From 996bf2db7ed9e9825b4e1ddd3d835231182950e7 Mon Sep 17 00:00:00 2001 From: rachaelvp Date: Fri, 30 Jun 2023 09:18:50 -0700 Subject: [PATCH 1/2] add weights to args depending on hal9001 version --- R/Lrnr_hal9001.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/Lrnr_hal9001.R b/R/Lrnr_hal9001.R index e9e5120e..34403283 100644 --- a/R/Lrnr_hal9001.R +++ b/R/Lrnr_hal9001.R @@ -92,7 +92,6 @@ Lrnr_hal9001 <- R6Class( verbose <- getOption("sl3.verbose") } - args$X <- as.matrix(task$X) outcome_type <- self$get_outcome_type(task) @@ -125,7 +124,11 @@ Lrnr_hal9001 <- R6Class( } if (task$has_node("weights")) { - args$weights <- task$weights + if (packageVersion("hal9001") >= "0.4.5") { + args$weights <- task$weights + } else { + args$fit_control$weights <- task$weights + } } if (task$has_node("offset")) { From 9fae3f27d39fe1a5f08d4ca9ece415175f905314 Mon Sep 17 00:00:00 2001 From: rachaelvp Date: Fri, 30 Jun 2023 09:19:00 -0700 Subject: [PATCH 2/2] make pr changes --- R/Lrnr_cv.R | 10 ++--- R/Lrnr_dbarts.R | 10 ++--- R/Lrnr_density_discretize.R | 15 ++++--- R/Lrnr_density_hse.R | 13 +++--- R/Lrnr_density_semiparametric.R | 13 +++--- R/Lrnr_grf.R | 15 ++++--- R/Lrnr_gts.R | 8 ++-- R/Lrnr_h2o_glm.R | 15 ++++--- R/Lrnr_h2o_grid.R | 16 ++++--- R/Lrnr_independent_binomial.R | 15 ++++--- R/Lrnr_multiple_ts.R | 38 ++++++++-------- R/Lrnr_multivariate.R | 15 ++++--- R/Lrnr_nnet.R | 10 ++--- R/Lrnr_pca.R | 10 ++--- R/Lrnr_pooled_hazards.R | 10 ++--- R/Lrnr_screener_augment.R | 14 +++--- R/Lrnr_screener_coefs.R | 16 +++---- R/Lrnr_screener_correlation.R | 16 +++---- R/Lrnr_stratified.R | 12 ++--- R/Lrnr_subset_covariates.R | 14 +++--- man/Lrnr_cv.Rd | 19 ++++++++ man/Lrnr_dbarts.Rd | 15 +++++++ man/Lrnr_density_discretize.Rd | 17 +++++++ man/Lrnr_density_hse.Rd | 15 +++++++ man/Lrnr_density_semiparametric.Rd | 17 +++++++ man/Lrnr_grf.Rd | 15 +++++++ man/Lrnr_gts.Rd | 25 +++++++++++ man/Lrnr_h2o_glm.Rd | 18 ++++++++ man/Lrnr_h2o_grid.Rd | 23 ++++++++++ man/Lrnr_independent_binomial.Rd | 20 +++++++++ man/Lrnr_multiple_ts.Rd | 47 ++++++++++++++++++++ man/Lrnr_multivariate.Rd | 27 +++++++++++ man/Lrnr_nnet.Rd | 16 +++++++ man/Lrnr_pca.Rd | 24 ++++++++++ man/Lrnr_pooled_hazards.Rd | 29 ++++++++++++ man/Lrnr_screener_augment.Rd | 29 ++++++++++++ man/Lrnr_screener_coefs.Rd | 29 ++++++++++++ man/Lrnr_screener_correlation.Rd | 29 ++++++++++++ man/Lrnr_stratified.Rd | 22 +++++++++ man/Lrnr_subset_covariates.Rd | 22 +++++++++ tests/testthat/test-add_many_columns.R | 4 +- tests/testthat/test-bartMachine.R | 8 ++-- tests/testthat/test-binomial_learners.R | 2 +- tests/testthat/test-caret.R | 6 +-- tests/testthat/test-character_covariates.R | 4 +- tests/testthat/test-cv_sl.R | 8 ++-- tests/testthat/test-dbarts.R | 2 +- tests/testthat/test-delayed_sl3.R | 9 ++-- tests/testthat/test-density-pooled_hazards.R | 9 ++-- tests/testthat/test-density-semiparametric.R | 30 ++++++------- tests/testthat/test-density_hse.R | 17 +++---- tests/testthat/test-density_utils.R | 14 +++--- tests/testthat/test-ga.R | 6 +-- tests/testthat/test-lightgbm.R | 3 +- tests/testthat/test-mean.R | 4 +- tests/testthat/test-nnet.R | 12 +++-- tests/testthat/test-offset.R | 10 ++--- tests/testthat/test-pipeline.R | 14 +++--- tests/testthat/test-randomForest.R | 18 ++++---- tests/testthat/test-ranger.R | 15 ++++--- tests/testthat/test-rugarch.R | 26 ++++++----- tests/testthat/test-sl-timeseries.R | 19 ++++---- tests/testthat/test-sl_fold.R | 8 ++-- 63 files changed, 736 insertions(+), 255 deletions(-) diff --git a/R/Lrnr_cv.R b/R/Lrnr_cv.R index c626278e..00c65b64 100644 --- a/R/Lrnr_cv.R +++ b/R/Lrnr_cv.R @@ -76,22 +76,22 @@ interpret_fold_number <- function(fold_number) { #' This can then be accessed with predict_fold(task, fold_number="full") #' } #' } -#' -#' @examples +#' +#' @examples #' library(origami) -#' +#' #' # load example data #' data(cpp_imputed) #' covars <- c( #' "apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs", "sexn" #' ) #' outcome <- "haz" -#' +#' #' # create sl3 task #' task <- sl3_Task$new(cpp_imputed, covariates = covars, outcome = outcome) #' glm_learner <- Lrnr_glm$new() #' cv_glm <- Lrnr_cv$new(glm_learner, folds = make_folds(cpp_imputed, V = 10)) -#' +#' #' # train cv learner #' cv_glm_fit <- cv_glm$train(task) #' preds <- cv_glm_fit$predict() diff --git a/R/Lrnr_dbarts.R b/R/Lrnr_dbarts.R index 4fbf1003..11dd33dc 100644 --- a/R/Lrnr_dbarts.R +++ b/R/Lrnr_dbarts.R @@ -99,18 +99,18 @@ #' } #' #' @template common_parameters -#' -#' @examples +#' +#' @examples #' set.seed(123) -#' +#' #' # load example data #' data(cpp_imputed) #' covs <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs") -#' +#' #' # create sl3 task #' task <- sl3_Task$new(cpp_imputed, covariates = covs, outcome = "haz") #' dbart_learner <- make_learner(Lrnr_dbarts, ndpost = 200) -#' +#' #' # train dbart learner and make predictions #' dbart_fit <- dbart_learner$train(task) #' preds <- dbart_fit$predict() diff --git a/R/Lrnr_density_discretize.R b/R/Lrnr_density_discretize.R index af02846a..20160f39 100644 --- a/R/Lrnr_density_discretize.R +++ b/R/Lrnr_density_discretize.R @@ -23,17 +23,18 @@ #' } #' #' @template common_parameters -#' -#' @examples +#' +#' @examples #' # load example data #' data(cpp_imputed) -#' +#' #' # create sl3 task #' task <- sl3_Task$new( -#' cpp_imputed, -#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"), -#' outcome = "haz") -#' +#' cpp_imputed, +#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"), +#' outcome = "haz" +#' ) +#' #' # train density discretize learner and make predictions #' lrnr_discretize <- Lrnr_density_discretize$new( #' categorical_learner = Lrnr_glmnet$new() diff --git a/R/Lrnr_density_hse.R b/R/Lrnr_density_hse.R index cde26515..3c9e0459 100644 --- a/R/Lrnr_density_hse.R +++ b/R/Lrnr_density_hse.R @@ -26,16 +26,17 @@ #' #' @template common_parameters #' -#' @examples +#' @examples #' # load example data #' data(cpp_imputed) -#' +#' #' # create sl3 task #' task <- sl3_Task$new( -#' cpp_imputed, -#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"), -#' outcome = "haz") -#' +#' cpp_imputed, +#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"), +#' outcome = "haz" +#' ) +#' #' # train density hse learner and make predictions #' lrnr_density_hse <- Lrnr_density_hse$new(mean_learner = Lrnr_glm$new()) #' fit_density_hse <- lrnr_density_hse$train(task) diff --git a/R/Lrnr_density_semiparametric.R b/R/Lrnr_density_semiparametric.R index 8cd9fe95..916f112d 100644 --- a/R/Lrnr_density_semiparametric.R +++ b/R/Lrnr_density_semiparametric.R @@ -26,16 +26,17 @@ #' #' @template common_parameters #' -#' @examples +#' @examples #' # load example data #' data(cpp_imputed) -#' +#' #' # create sl3 task #' task <- sl3_Task$new( -#' cpp_imputed, -#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"), -#' outcome = "haz") -#' +#' cpp_imputed, +#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"), +#' outcome = "haz" +#' ) +#' #' # train density hse learner and make predictions #' lrnr_density_semi <- Lrnr_density_semiparametric$new( #' mean_learner = Lrnr_glm$new() diff --git a/R/Lrnr_grf.R b/R/Lrnr_grf.R index 95a61a54..5ef96fe2 100644 --- a/R/Lrnr_grf.R +++ b/R/Lrnr_grf.R @@ -68,17 +68,18 @@ #' } #' #' @template common_parameters -#' -#' @examples +#' +#' @examples #' # load example data #' data(cpp_imputed) -#' +#' #' # create sl3 task #' task <- sl3_Task$new( -#' cpp_imputed, -#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"), -#' outcome = "haz") -#' +#' cpp_imputed, +#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"), +#' outcome = "haz" +#' ) +#' #' # train grf learner and make predictions #' lrnr_grf <- Lrnr_grf$new(seed = 123) #' lrnr_grf_fit <- lrnr_grf$train(task) diff --git a/R/Lrnr_gts.R b/R/Lrnr_gts.R index b9c7ee80..cdf1f818 100644 --- a/R/Lrnr_gts.R +++ b/R/Lrnr_gts.R @@ -59,12 +59,12 @@ #' are going to be used.} #' } #' -#' @examples +#' @examples #' # Example adapted from hts package manual #' # The hierarchical structure looks like 2 child nodes associated with level 1, #' # which are followed by 3 and 2 sub-child nodes respectively at level 2. #' library(hts) -#' +#' #' set.seed(3274) #' abc <- as.data.table(5 + matrix(sort(rnorm(200)), ncol = 4, nrow = 50)) #' setnames(abc, paste("Series", 1:ncol(abc), sep = "_")) @@ -72,14 +72,14 @@ #' grps <- rbind(c(1, 1, 2, 2), c(1, 2, 1, 2)) #' horizon <- 12 #' suppressWarnings(abc_long <- melt(abc, id = "time", variable.name = "series")) -#' +#' #' # create sl3 task (no outcome for hierarchical/grouped series) #' node_list <- list(outcome = "value", time = "time", id = "series") #' train_task <- sl3_Task$new(data = abc_long, nodes = node_list) #' test_data <- expand.grid(time = 51:55, series = unique(abc_long$series)) #' test_data <- as.data.table(test_data)[, value := 0] #' test_task <- sl3_Task$new(data = test_data, nodes = node_list) -#' +#' #' gts_learner <- Lrnr_gts$new() #' gts_learner_fit <- gts_learner$train(train_task) #' gts_learner_preds <- gts_learner_fit$predict(test_task) diff --git a/R/Lrnr_h2o_glm.R b/R/Lrnr_h2o_glm.R index 896a9f6c..6aec023c 100644 --- a/R/Lrnr_h2o_glm.R +++ b/R/Lrnr_h2o_glm.R @@ -69,19 +69,20 @@ define_h2o_X <- function(task, outcome_type = NULL) { #' #' @template common_parameters #' -#' @examples +#' @examples #' library(h2o) #' suppressWarnings(h2o.init()) -#' +#' #' # load example data #' data(cpp_imputed) -#' +#' #' # create sl3 task #' task <- sl3_Task$new( -#' cpp_imputed, -#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"), -#' outcome = "haz") -#' +#' cpp_imputed, +#' covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"), +#' outcome = "haz" +#' ) +#' #' # train h2o glm learner and make predictions #' lrnr_h2o <- Lrnr_h2o_glm$new() #' lrnr_h2o_fit <- lrnr_h2o$train(task) diff --git a/R/Lrnr_h2o_grid.R b/R/Lrnr_h2o_grid.R index 8ee03367..6c4b43d5 100644 --- a/R/Lrnr_h2o_grid.R +++ b/R/Lrnr_h2o_grid.R @@ -43,12 +43,12 @@ #' } #' #' @template common_parameters -#' -#' @examples +#' +#' @examples #' library(h2o) #' suppressWarnings(h2o.init()) #' set.seed(1) -#' +#' #' # load example data #' data(cpp_imputed) #' covars <- c( @@ -57,13 +57,15 @@ #' ) #' outcome <- "haz" #' cpp_imputed <- cpp_imputed[1:150, ] -#' +#' #' # create sl3 task #' task <- sl3_Task$new(cpp_imputed, covariates = covars, outcome = outcome) -#' +#' #' # h2o grid search hyperparameter alpha -#' h2o_glm_grid <- Lrnr_h2o_grid$new(algorithm = "glm", -#' hyper_params = list(alpha = c(0, 0.5))) +#' h2o_glm_grid <- Lrnr_h2o_grid$new( +#' algorithm = "glm", +#' hyper_params = list(alpha = c(0, 0.5)) +#' ) #' h2o_glm_grid_fit <- h2o_glm_grid$train(task) #' pred <- h2o_glm_grid_fit$predict() Lrnr_h2o_grid <- R6Class( diff --git a/R/Lrnr_independent_binomial.R b/R/Lrnr_independent_binomial.R index 3ed59aa5..1a349465 100644 --- a/R/Lrnr_independent_binomial.R +++ b/R/Lrnr_independent_binomial.R @@ -27,21 +27,22 @@ #' #' @template common_parameters #' -#' @examples +#' @examples #' library(dplyr) -#' +#' #' # load example data #' data(cpp) #' cpp <- cpp %>% #' select(c(bmi, agedays, feeding)) %>% #' mutate(feeding = as.factor(feeding)) %>% #' na.omit() -#' +#' #' # create sl3 task -#' task <- make_sl3_Task(cpp, -#' covariates = c("agedays", "bmi"), -#' outcome = "feeding") -#' +#' task <- make_sl3_Task(cpp, +#' covariates = c("agedays", "bmi"), +#' outcome = "feeding" +#' ) +#' #' # train independent binomial learner and make predictions #' lrnr_indbinomial <- make_learner(Lrnr_independent_binomial) #' fit <- lrnr_indbinomial$train(task) diff --git a/R/Lrnr_multiple_ts.R b/R/Lrnr_multiple_ts.R index 188a195b..92d7d82e 100644 --- a/R/Lrnr_multiple_ts.R +++ b/R/Lrnr_multiple_ts.R @@ -26,13 +26,13 @@ #' \code{learner$train}. See its documentation for details. #' } #' } -#' -#' @examples +#' +#' @examples #' library(origami) #' library(dplyr) -#' +#' #' set.seed(123) -#' +#' #' # Simulate simple AR(2) process #' data <- matrix(arima.sim(model = list(ar = c(.9, -.2)), n = 200)) #' id <- c(rep("Series_1", 50), rep("Series_2", 50), rep("Series_3", 50), rep("Series_4", 50)) @@ -41,36 +41,36 @@ #' data <- data %>% #' group_by(id) %>% #' dplyr::mutate(time = 1:n()) -#' +#' #' data$W1 <- rbinom(200, 1, 0.6) #' data$W2 <- rbinom(200, 1, 0.2) -#' +#' #' data <- as.data.table(data) -#' +#' #' folds <- origami::make_folds(data, -#' t = max(data$time), -#' id = data$id, -#' time = data$time, -#' fold_fun = folds_rolling_window_pooled, -#' window_size = 20, -#' validation_size = 15, -#' gap = 0, -#' batch = 10 +#' t = max(data$time), +#' id = data$id, +#' time = data$time, +#' fold_fun = folds_rolling_window_pooled, +#' window_size = 20, +#' validation_size = 15, +#' gap = 0, +#' batch = 10 #' ) -#' +#' #' task <- sl3_Task$new( #' data = data, outcome = "data", #' time = "time", id = "id", #' covariates = c("W1", "W2"), #' folds = folds #' ) -#' +#' #' train_task <- training(task, fold = task$folds[[1]]) #' valid_task <- validation(task, fold = task$folds[[1]]) -#' +#' #' lrnr_arima <- Lrnr_arima$new() #' multiple_ts_arima <- Lrnr_multiple_ts$new(learner = lrnr_arima) -#' +#' #' multiple_ts_arima_fit <- multiple_ts_arima$train(train_task) #' multiple_ts_arima_preds <- multiple_ts_arima_fit$predict(valid_task) Lrnr_multiple_ts <- R6Class( diff --git a/R/Lrnr_multivariate.R b/R/Lrnr_multivariate.R index 8c452128..e87c27be 100644 --- a/R/Lrnr_multivariate.R +++ b/R/Lrnr_multivariate.R @@ -25,10 +25,10 @@ #' } #' #' @template common_parameters -#' -#' @examples +#' +#' @examples #' library(data.table) -#' +#' #' # simulate data #' set.seed(123) #' n <- 1000 @@ -41,12 +41,13 @@ #' data <- data.table(W, Y) #' covariates <- grep("W", names(data), value = TRUE) #' outcomes <- grep("Y", names(data), value = TRUE) -#' +#' #' # make sl3 task #' task <- sl3_Task$new(data.table::copy(data), -#' covariates = covariates, -#' outcome = outcomes) -#' +#' covariates = covariates, +#' outcome = outcomes +#' ) +#' #' # train multivariate learner and make predictions #' mv_learner <- make_learner(Lrnr_multivariate, make_learner(Lrnr_glm_fast)) #' mv_fit <- mv_learner$train(task) diff --git a/R/Lrnr_nnet.R b/R/Lrnr_nnet.R index f42ffddd..271eb9d6 100644 --- a/R/Lrnr_nnet.R +++ b/R/Lrnr_nnet.R @@ -33,18 +33,18 @@ #' } #' #' @template common_parameters -#' -#' @examples +#' +#' @examples #' set.seed(123) -#' +#' #' # load example data #' data(cpp_imputed) #' covars <- c("bmi", "parity", "mage", "sexn") #' outcome <- "haz" -#' +#' #' # create sl3 task #' task <- sl3_Task$new(cpp_imputed, covariates = covars, outcome = outcome) -#' +#' #' # train neural networks and make predictions #' lrnr_nnet <- Lrnr_nnet$new(linout = TRUE, size = 10, maxit = 1000) #' fit <- lrnr_nnet$train(task) diff --git a/R/Lrnr_pca.R b/R/Lrnr_pca.R index cfe39377..be7b971c 100644 --- a/R/Lrnr_pca.R +++ b/R/Lrnr_pca.R @@ -40,9 +40,9 @@ #' #' @template common_parameters #' -#' @examples +#' @examples #' set.seed(37912) -#' +#' #' # load example data #' ncomp <- 3 #' data(cpp_imputed) @@ -51,15 +51,15 @@ #' "sexn" #' ) #' outcome <- "haz" -#' +#' #' # create sl3 task #' task <- sl3_Task$new(cpp_imputed, covariates = covars, outcome = outcome) -#' +#' #' # define learners #' glm_fast <- Lrnr_glm_fast$new(intercept = FALSE) #' pca_sl3 <- Lrnr_pca$new(n_comp = ncomp, center = TRUE, scale. = TRUE) #' pcr_pipe_sl3 <- Pipeline$new(pca_sl3, glm_fast) -#' +#' #' # create stacks + train and predict #' pcr_pipe_sl3_fit <- pcr_pipe_sl3$train(task) #' pcr_pred <- pcr_pipe_sl3_fit$predict() diff --git a/R/Lrnr_pooled_hazards.R b/R/Lrnr_pooled_hazards.R index 63666c2f..84b7a396 100644 --- a/R/Lrnr_pooled_hazards.R +++ b/R/Lrnr_pooled_hazards.R @@ -25,18 +25,18 @@ #' } #' #' @template common_parameters -#' -#' @examples +#' +#' @examples #' library(data.table) #' set.seed(74294) -#' +#' #' n <- 500 #' x <- rnorm(n) #' epsilon <- rnorm(n) #' y <- 3 * x + epsilon #' data <- data.table(x = x, y = y) #' task <- sl3_Task$new(data, covariates = c("x"), outcome = "y") -#' +#' #' # instantiate learners #' hal <- Lrnr_hal9001$new( #' lambda = exp(seq(-1, -13, length = 100)), @@ -49,7 +49,7 @@ #' type = "equal_range", #' n_bins = 5 #' ) -#' +#' #' # fit discrete density model to pooled hazards data #' set.seed(74294) #' fit_density <- density_learner$train(task) diff --git a/R/Lrnr_screener_augment.R b/R/Lrnr_screener_augment.R index 39210de0..e3be1989 100644 --- a/R/Lrnr_screener_augment.R +++ b/R/Lrnr_screener_augment.R @@ -26,10 +26,10 @@ #' whether or not these covariates were selected by the screener.} #' \item{\code{...}}{Other parameters passed to \code{screener}.} #' } -#' -#' @examples +#' +#' @examples #' library(data.table) -#' +#' #' # load example data #' data(cpp_imputed) #' setDT(cpp_imputed) @@ -39,13 +39,13 @@ #' "sexn" #' ) #' outcome <- "haz" -#' +#' #' # create sl3 task #' task <- sl3_Task$new(data.table::copy(cpp_imputed), -#' covariates = covars, -#' outcome = outcome +#' covariates = covars, +#' outcome = outcome #' ) -#' +#' #' screener_cor <- make_learner( #' Lrnr_screener_correlation, #' type = "rank", diff --git a/R/Lrnr_screener_coefs.R b/R/Lrnr_screener_coefs.R index c0a716d6..f6071638 100644 --- a/R/Lrnr_screener_coefs.R +++ b/R/Lrnr_screener_coefs.R @@ -28,10 +28,10 @@ #' applicable when supplied \code{learner} is a \code{\link{Lrnr_glmnet}}.} #' \item{\code{...}}{Other parameters passed to \code{learner}.} #' } -#' -#' @examples +#' +#' @examples #' library(data.table) -#' +#' #' # load example data #' data(cpp_imputed) #' setDT(cpp_imputed) @@ -41,18 +41,18 @@ #' "sexn" #' ) #' outcome <- "haz" -#' +#' #' # create sl3 task #' task <- sl3_Task$new(data.table::copy(cpp_imputed), -#' covariates = covars, -#' outcome = outcome +#' covariates = covars, +#' outcome = outcome #' ) -#' +#' #' lrnr_glmnet <- make_learner(Lrnr_glmnet) #' lrnr_glm <- make_learner(Lrnr_glm) #' lrnr_mean <- make_learner(Lrnr_mean) #' lrnrs <- make_learner(Stack, lrnr_glm, lrnr_mean) -#' +#' #' glm_screener <- make_learner(Lrnr_screener_coefs, lrnr_glm, max_screen = 2) #' glm_screener_pipeline <- make_learner(Pipeline, glm_screener, lrnrs) #' fit_glm_screener_pipeline <- glm_screener_pipeline$train(task) diff --git a/R/Lrnr_screener_correlation.R b/R/Lrnr_screener_correlation.R index ad4aa8c0..2d43fa9d 100644 --- a/R/Lrnr_screener_correlation.R +++ b/R/Lrnr_screener_correlation.R @@ -34,10 +34,10 @@ #' \item{\code{min_screen = 2}}{Minimum number of covariates to select. Used #' in pvalue_threshold screening procedure.} #' } -#' -#' @examples +#' +#' @examples #' library(data.table) -#' +#' #' # load example data #' data(cpp_imputed) #' setDT(cpp_imputed) @@ -47,18 +47,18 @@ #' "sexn" #' ) #' outcome <- "haz" -#' +#' #' # create sl3 task #' task <- sl3_Task$new(data.table::copy(cpp_imputed), -#' covariates = covars, -#' outcome = outcome +#' covariates = covars, +#' outcome = outcome #' ) -#' +#' #' lrnr_glmnet <- make_learner(Lrnr_glmnet) #' lrnr_glm <- make_learner(Lrnr_glm) #' lrnr_mean <- make_learner(Lrnr_mean) #' lrnrs <- make_learner(Stack, lrnr_glm, lrnr_mean) -#' +#' #' screen_corP <- make_learner(Lrnr_screener_correlation, type = "threshold") #' corP_pipeline <- make_learner(Pipeline, screen_corP, lrnrs) #' fit_corP <- corP_pipeline$train(task) diff --git a/R/Lrnr_stratified.R b/R/Lrnr_stratified.R index 0e367952..fa44d6c3 100644 --- a/R/Lrnr_stratified.R +++ b/R/Lrnr_stratified.R @@ -26,24 +26,24 @@ #' \code{learner$train}. See its documentation for details. #' } #' } -#' -#' @examples +#' +#' @examples #' library(data.table) -#' +#' #' # load example data set #' data(cpp_imputed) #' setDT(cpp_imputed) -#' +#' #' # use covariates of intest and the outcome to build a task object #' covars <- c("apgar1", "apgar5", "sexn") #' task <- sl3_Task$new(cpp_imputed, covariates = covars, outcome = "haz") -#' +#' #' hal_lrnr <- Lrnr_hal9001$new(fit_control = list(n_folds = 3)) #' stratified_hal <- Lrnr_stratified$new( #' learner = hal_lrnr, #' variable_stratify = "sexn" #' ) -#' +#' #' # stratified learner #' set.seed(123) #' stratified_hal_fit <- stratified_hal$train(task) diff --git a/R/Lrnr_subset_covariates.R b/R/Lrnr_subset_covariates.R index 4ef4a800..3ec08814 100644 --- a/R/Lrnr_subset_covariates.R +++ b/R/Lrnr_subset_covariates.R @@ -25,25 +25,25 @@ #' #' @template common_parameters #' -#' @examples +#' @examples #' # load example data #' data(cpp_imputed) #' covars <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs", "sexn") #' outcome <- "haz" -#' +#' #' # create sl3 task #' task <- sl3_Task$new(data.table::copy(cpp_imputed), -#' covariates = covars, -#' outcome = outcome, -#' folds = origami::make_folds(cpp_imputed, V = 3) +#' covariates = covars, +#' outcome = outcome, +#' folds = origami::make_folds(cpp_imputed, V = 3) #' ) -#' +#' #' glm_learner <- Lrnr_glm$new() #' glmnet_learner <- Lrnr_glmnet$new() #' subset_apgar <- Lrnr_subset_covariates$new(covariates = c("apgar1", "apgar5")) #' learners <- list(glm_learner, glmnet_learner, subset_apgar) #' sl <- make_learner(Lrnr_sl, learners, glm_learner) -#' +#' #' sl_fit <- sl$train(task) #' sl_pred <- sl_fit$predict() Lrnr_subset_covariates <- R6Class( diff --git a/man/Lrnr_cv.Rd b/man/Lrnr_cv.Rd index b6eeaa6c..a2866897 100644 --- a/man/Lrnr_cv.Rd +++ b/man/Lrnr_cv.Rd @@ -26,6 +26,25 @@ This can then be accessed with predict_fold(task, fold_number="full") } } +\examples{ +library(origami) + +# load example data +data(cpp_imputed) +covars <- c( + "apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs", "sexn" +) +outcome <- "haz" + +# create sl3 task +task <- sl3_Task$new(cpp_imputed, covariates = covars, outcome = outcome) +glm_learner <- Lrnr_glm$new() +cv_glm <- Lrnr_cv$new(glm_learner, folds = make_folds(cpp_imputed, V = 10)) + +# train cv learner +cv_glm_fit <- cv_glm$train(task) +preds <- cv_glm_fit$predict() +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/man/Lrnr_dbarts.Rd b/man/Lrnr_dbarts.Rd index 6a521c30..27c57ef3 100644 --- a/man/Lrnr_dbarts.Rd +++ b/man/Lrnr_dbarts.Rd @@ -108,6 +108,21 @@ by all learners. } } +\examples{ +set.seed(123) + +# load example data +data(cpp_imputed) +covs <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs") + +# create sl3 task +task <- sl3_Task$new(cpp_imputed, covariates = covs, outcome = "haz") +dbart_learner <- make_learner(Lrnr_dbarts, ndpost = 200) + +# train dbart learner and make predictions +dbart_fit <- dbart_learner$train(task) +preds <- dbart_fit$predict() +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/man/Lrnr_density_discretize.Rd b/man/Lrnr_density_discretize.Rd index 105b76b0..53a4627a 100644 --- a/man/Lrnr_density_discretize.Rd +++ b/man/Lrnr_density_discretize.Rd @@ -34,6 +34,23 @@ by all learners. } } +\examples{ +# load example data +data(cpp_imputed) + +# create sl3 task +task <- sl3_Task$new( + cpp_imputed, + covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"), + outcome = "haz") + +# train density discretize learner and make predictions +lrnr_discretize <- Lrnr_density_discretize$new( + categorical_learner = Lrnr_glmnet$new() +) +lrnr_discretize_fit <- lrnr_discretize$train(task) +lrnr_discretize_pred <- lrnr_discretize_fit$predict() +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/man/Lrnr_density_hse.Rd b/man/Lrnr_density_hse.Rd index d0541bfa..3290c560 100644 --- a/man/Lrnr_density_hse.Rd +++ b/man/Lrnr_density_hse.Rd @@ -35,6 +35,21 @@ by all learners. } } +\examples{ +# load example data +data(cpp_imputed) + +# create sl3 task +task <- sl3_Task$new( + cpp_imputed, + covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"), + outcome = "haz") + +# train density hse learner and make predictions +lrnr_density_hse <- Lrnr_density_hse$new(mean_learner = Lrnr_glm$new()) +fit_density_hse <- lrnr_density_hse$train(task) +preds_density_hse <- fit_density_hse$predict() +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/man/Lrnr_density_semiparametric.Rd b/man/Lrnr_density_semiparametric.Rd index acaca399..76bb28a9 100644 --- a/man/Lrnr_density_semiparametric.Rd +++ b/man/Lrnr_density_semiparametric.Rd @@ -35,6 +35,23 @@ by all learners. } } +\examples{ +# load example data +data(cpp_imputed) + +# create sl3 task +task <- sl3_Task$new( + cpp_imputed, + covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"), + outcome = "haz") + +# train density hse learner and make predictions +lrnr_density_semi <- Lrnr_density_semiparametric$new( + mean_learner = Lrnr_glm$new() +) +lrnr_density_semi_fit <- lrnr_density_semi$train(task) +lrnr_density_semi_pred <- lrnr_density_semi_fit$predict() +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/man/Lrnr_grf.Rd b/man/Lrnr_grf.Rd index 147460b4..d3ab470b 100644 --- a/man/Lrnr_grf.Rd +++ b/man/Lrnr_grf.Rd @@ -77,6 +77,21 @@ by all learners. } } +\examples{ +# load example data +data(cpp_imputed) + +# create sl3 task +task <- sl3_Task$new( + cpp_imputed, + covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"), + outcome = "haz") + +# train grf learner and make predictions +lrnr_grf <- Lrnr_grf$new(seed = 123) +lrnr_grf_fit <- lrnr_grf$train(task) +lrnr_grf_pred <- lrnr_grf_fit$predict() +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/man/Lrnr_gts.Rd b/man/Lrnr_gts.Rd index 6d5cbd16..b610af1c 100644 --- a/man/Lrnr_gts.Rd +++ b/man/Lrnr_gts.Rd @@ -57,6 +57,31 @@ are going to be used.} } } +\examples{ +# Example adapted from hts package manual +# The hierarchical structure looks like 2 child nodes associated with level 1, +# which are followed by 3 and 2 sub-child nodes respectively at level 2. +library(hts) + +set.seed(3274) +abc <- as.data.table(5 + matrix(sort(rnorm(200)), ncol = 4, nrow = 50)) +setnames(abc, paste("Series", 1:ncol(abc), sep = "_")) +abc[, time := .I] +grps <- rbind(c(1, 1, 2, 2), c(1, 2, 1, 2)) +horizon <- 12 +suppressWarnings(abc_long <- melt(abc, id = "time", variable.name = "series")) + +# create sl3 task (no outcome for hierarchical/grouped series) +node_list <- list(outcome = "value", time = "time", id = "series") +train_task <- sl3_Task$new(data = abc_long, nodes = node_list) +test_data <- expand.grid(time = 51:55, series = unique(abc_long$series)) +test_data <- as.data.table(test_data)[, value := 0] +test_task <- sl3_Task$new(data = test_data, nodes = node_list) + +gts_learner <- Lrnr_gts$new() +gts_learner_fit <- gts_learner$train(train_task) +gts_learner_preds <- gts_learner_fit$predict(test_task) +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/man/Lrnr_h2o_glm.Rd b/man/Lrnr_h2o_glm.Rd index 9c626b98..f12906e3 100644 --- a/man/Lrnr_h2o_glm.Rd +++ b/man/Lrnr_h2o_glm.Rd @@ -61,6 +61,24 @@ by all learners. } } +\examples{ +library(h2o) +suppressWarnings(h2o.init()) + +# load example data +data(cpp_imputed) + +# create sl3 task +task <- sl3_Task$new( + cpp_imputed, + covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"), + outcome = "haz") + +# train h2o glm learner and make predictions +lrnr_h2o <- Lrnr_h2o_glm$new() +lrnr_h2o_fit <- lrnr_h2o$train(task) +lrnr_h2o_pred <- lrnr_h2o_fit$predict() +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/man/Lrnr_h2o_grid.Rd b/man/Lrnr_h2o_grid.Rd index 38e65f02..6dde29d7 100644 --- a/man/Lrnr_h2o_grid.Rd +++ b/man/Lrnr_h2o_grid.Rd @@ -56,6 +56,29 @@ by all learners. } } +\examples{ +library(h2o) +suppressWarnings(h2o.init()) +set.seed(1) + +# load example data +data(cpp_imputed) +covars <- c( + "apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs", + "sexn" +) +outcome <- "haz" +cpp_imputed <- cpp_imputed[1:150, ] + +# create sl3 task +task <- sl3_Task$new(cpp_imputed, covariates = covars, outcome = outcome) + +# h2o grid search hyperparameter alpha +h2o_glm_grid <- Lrnr_h2o_grid$new(algorithm = "glm", + hyper_params = list(alpha = c(0, 0.5))) +h2o_glm_grid_fit <- h2o_glm_grid$train(task) +pred <- h2o_glm_grid_fit$predict() +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/man/Lrnr_independent_binomial.Rd b/man/Lrnr_independent_binomial.Rd index f4af7502..cea7a0d7 100644 --- a/man/Lrnr_independent_binomial.Rd +++ b/man/Lrnr_independent_binomial.Rd @@ -36,6 +36,26 @@ by all learners. } } +\examples{ +library(dplyr) + +# load example data +data(cpp) +cpp <- cpp \%>\% + select(c(bmi, agedays, feeding)) \%>\% + mutate(feeding = as.factor(feeding)) \%>\% + na.omit() + +# create sl3 task +task <- make_sl3_Task(cpp, + covariates = c("agedays", "bmi"), + outcome = "feeding") + +# train independent binomial learner and make predictions +lrnr_indbinomial <- make_learner(Lrnr_independent_binomial) +fit <- lrnr_indbinomial$train(task) +preds <- fit$predict(task) +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/man/Lrnr_multiple_ts.Rd b/man/Lrnr_multiple_ts.Rd index a7c6ec44..145002af 100644 --- a/man/Lrnr_multiple_ts.Rd +++ b/man/Lrnr_multiple_ts.Rd @@ -29,6 +29,53 @@ variables with discrete levels coded as \code{numeric}. } } +\examples{ +library(origami) +library(dplyr) + +set.seed(123) + +# Simulate simple AR(2) process +data <- matrix(arima.sim(model = list(ar = c(.9, -.2)), n = 200)) +id <- c(rep("Series_1", 50), rep("Series_2", 50), rep("Series_3", 50), rep("Series_4", 50)) +data <- data.frame(data) +data$id <- as.factor(id) +data <- data \%>\% + group_by(id) \%>\% + dplyr::mutate(time = 1:n()) + +data$W1 <- rbinom(200, 1, 0.6) +data$W2 <- rbinom(200, 1, 0.2) + +data <- as.data.table(data) + +folds <- origami::make_folds(data, + t = max(data$time), + id = data$id, + time = data$time, + fold_fun = folds_rolling_window_pooled, + window_size = 20, + validation_size = 15, + gap = 0, + batch = 10 +) + +task <- sl3_Task$new( + data = data, outcome = "data", + time = "time", id = "id", + covariates = c("W1", "W2"), + folds = folds +) + +train_task <- training(task, fold = task$folds[[1]]) +valid_task <- validation(task, fold = task$folds[[1]]) + +lrnr_arima <- Lrnr_arima$new() +multiple_ts_arima <- Lrnr_multiple_ts$new(learner = lrnr_arima) + +multiple_ts_arima_fit <- multiple_ts_arima$train(train_task) +multiple_ts_arima_preds <- multiple_ts_arima_fit$predict(valid_task) +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/man/Lrnr_multivariate.Rd b/man/Lrnr_multivariate.Rd index 13823074..f5319908 100644 --- a/man/Lrnr_multivariate.Rd +++ b/man/Lrnr_multivariate.Rd @@ -35,6 +35,33 @@ by all learners. } } +\examples{ +library(data.table) + +# simulate data +set.seed(123) +n <- 1000 +p <- 5 +pY <- 3 +W <- matrix(rnorm(n * p), nrow = n) +colnames(W) <- sprintf("W\%d", seq_len(p)) +Y <- matrix(rnorm(n * pY, 0, 0.2) + W[, 1], nrow = n) +colnames(Y) <- sprintf("Y\%d", seq_len(pY)) +data <- data.table(W, Y) +covariates <- grep("W", names(data), value = TRUE) +outcomes <- grep("Y", names(data), value = TRUE) + +# make sl3 task +task <- sl3_Task$new(data.table::copy(data), + covariates = covariates, + outcome = outcomes) + +# train multivariate learner and make predictions +mv_learner <- make_learner(Lrnr_multivariate, make_learner(Lrnr_glm_fast)) +mv_fit <- mv_learner$train(task) +mv_pred <- mv_fit$predict(task) +mv_pred <- unpack_predictions(mv_pred) +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/man/Lrnr_nnet.Rd b/man/Lrnr_nnet.Rd index 832e1cf1..5e8d61cc 100644 --- a/man/Lrnr_nnet.Rd +++ b/man/Lrnr_nnet.Rd @@ -43,6 +43,22 @@ by all learners. } } +\examples{ +set.seed(123) + +# load example data +data(cpp_imputed) +covars <- c("bmi", "parity", "mage", "sexn") +outcome <- "haz" + +# create sl3 task +task <- sl3_Task$new(cpp_imputed, covariates = covars, outcome = outcome) + +# train neural networks and make predictions +lrnr_nnet <- Lrnr_nnet$new(linout = TRUE, size = 10, maxit = 1000) +fit <- lrnr_nnet$train(task) +preds <- fit$predict(task) +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/man/Lrnr_pca.Rd b/man/Lrnr_pca.Rd index b3fd5c86..ec89caec 100644 --- a/man/Lrnr_pca.Rd +++ b/man/Lrnr_pca.Rd @@ -49,6 +49,30 @@ by all learners. } } +\examples{ +set.seed(37912) + +# load example data +ncomp <- 3 +data(cpp_imputed) +covars <- c( + "apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs", + "sexn" +) +outcome <- "haz" + +# create sl3 task +task <- sl3_Task$new(cpp_imputed, covariates = covars, outcome = outcome) + +# define learners +glm_fast <- Lrnr_glm_fast$new(intercept = FALSE) +pca_sl3 <- Lrnr_pca$new(n_comp = ncomp, center = TRUE, scale. = TRUE) +pcr_pipe_sl3 <- Pipeline$new(pca_sl3, glm_fast) + +# create stacks + train and predict +pcr_pipe_sl3_fit <- pcr_pipe_sl3$train(task) +pcr_pred <- pcr_pipe_sl3_fit$predict() +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/man/Lrnr_pooled_hazards.Rd b/man/Lrnr_pooled_hazards.Rd index 292c1dce..48f4e03f 100644 --- a/man/Lrnr_pooled_hazards.Rd +++ b/man/Lrnr_pooled_hazards.Rd @@ -35,6 +35,35 @@ by all learners. } } +\examples{ +library(data.table) +set.seed(74294) + +n <- 500 +x <- rnorm(n) +epsilon <- rnorm(n) +y <- 3 * x + epsilon +data <- data.table(x = x, y = y) +task <- sl3_Task$new(data, covariates = c("x"), outcome = "y") + +# instantiate learners +hal <- Lrnr_hal9001$new( + lambda = exp(seq(-1, -13, length = 100)), + max_degree = 6, + smoothness_orders = 0 +) +hazard_learner <- Lrnr_pooled_hazards$new(hal) +density_learner <- Lrnr_density_discretize$new( + hazard_learner, + type = "equal_range", + n_bins = 5 +) + +# fit discrete density model to pooled hazards data +set.seed(74294) +fit_density <- density_learner$train(task) +pred_density <- fit_density$predict() +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/man/Lrnr_screener_augment.Rd b/man/Lrnr_screener_augment.Rd index a84db27b..a0af7b63 100644 --- a/man/Lrnr_screener_augment.Rd +++ b/man/Lrnr_screener_augment.Rd @@ -26,6 +26,35 @@ whether or not these covariates were selected by the screener.} } } +\examples{ +library(data.table) + +# load example data +data(cpp_imputed) +setDT(cpp_imputed) +cpp_imputed[, parity_cat := factor(ifelse(parity < 4, parity, 4))] +covars <- c( + "apgar1", "apgar5", "parity_cat", "gagebrth", "mage", "meducyrs", + "sexn" +) +outcome <- "haz" + +# create sl3 task +task <- sl3_Task$new(data.table::copy(cpp_imputed), + covariates = covars, + outcome = outcome +) + +screener_cor <- make_learner( + Lrnr_screener_correlation, + type = "rank", + num_screen = 2 +) +screener_augment <- Lrnr_screener_augment$new(screener_cor, covars) +screener_fit <- screener_augment$train(task) +selected <- screener_fit$fit_object$selected +screener_selected <- screener_fit$fit_object$screener_selected +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/man/Lrnr_screener_coefs.Rd b/man/Lrnr_screener_coefs.Rd index f55f6ca2..40b8dbe1 100644 --- a/man/Lrnr_screener_coefs.Rd +++ b/man/Lrnr_screener_coefs.Rd @@ -28,6 +28,35 @@ applicable when supplied \code{learner} is a \code{\link{Lrnr_glmnet}}.} } } +\examples{ +library(data.table) + +# load example data +data(cpp_imputed) +setDT(cpp_imputed) +cpp_imputed[, parity_cat := factor(ifelse(parity < 4, parity, 4))] +covars <- c( + "apgar1", "apgar5", "parity_cat", "gagebrth", "mage", "meducyrs", + "sexn" +) +outcome <- "haz" + +# create sl3 task +task <- sl3_Task$new(data.table::copy(cpp_imputed), + covariates = covars, + outcome = outcome +) + +lrnr_glmnet <- make_learner(Lrnr_glmnet) +lrnr_glm <- make_learner(Lrnr_glm) +lrnr_mean <- make_learner(Lrnr_mean) +lrnrs <- make_learner(Stack, lrnr_glm, lrnr_mean) + +glm_screener <- make_learner(Lrnr_screener_coefs, lrnr_glm, max_screen = 2) +glm_screener_pipeline <- make_learner(Pipeline, glm_screener, lrnrs) +fit_glm_screener_pipeline <- glm_screener_pipeline$train(task) +preds_glm_screener_pipeline <- fit_glm_screener_pipeline$predict() +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/man/Lrnr_screener_correlation.Rd b/man/Lrnr_screener_correlation.Rd index 8d621145..5c93d11c 100644 --- a/man/Lrnr_screener_correlation.Rd +++ b/man/Lrnr_screener_correlation.Rd @@ -34,6 +34,35 @@ in pvalue_threshold screening procedure.} } } +\examples{ +library(data.table) + +# load example data +data(cpp_imputed) +setDT(cpp_imputed) +cpp_imputed[, parity_cat := factor(ifelse(parity < 4, parity, 4))] +covars <- c( + "apgar1", "apgar5", "parity_cat", "gagebrth", "mage", "meducyrs", + "sexn" +) +outcome <- "haz" + +# create sl3 task +task <- sl3_Task$new(data.table::copy(cpp_imputed), + covariates = covars, + outcome = outcome +) + +lrnr_glmnet <- make_learner(Lrnr_glmnet) +lrnr_glm <- make_learner(Lrnr_glm) +lrnr_mean <- make_learner(Lrnr_mean) +lrnrs <- make_learner(Stack, lrnr_glm, lrnr_mean) + +screen_corP <- make_learner(Lrnr_screener_correlation, type = "threshold") +corP_pipeline <- make_learner(Pipeline, screen_corP, lrnrs) +fit_corP <- corP_pipeline$train(task) +preds_corP_screener <- fit_corP$predict() +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/man/Lrnr_stratified.Rd b/man/Lrnr_stratified.Rd index d938e666..457a68ce 100644 --- a/man/Lrnr_stratified.Rd +++ b/man/Lrnr_stratified.Rd @@ -29,6 +29,28 @@ variables with discrete levels coded as \code{numeric}. } } +\examples{ +library(data.table) + +# load example data set +data(cpp_imputed) +setDT(cpp_imputed) + +# use covariates of intest and the outcome to build a task object +covars <- c("apgar1", "apgar5", "sexn") +task <- sl3_Task$new(cpp_imputed, covariates = covars, outcome = "haz") + +hal_lrnr <- Lrnr_hal9001$new(fit_control = list(n_folds = 3)) +stratified_hal <- Lrnr_stratified$new( + learner = hal_lrnr, + variable_stratify = "sexn" +) + +# stratified learner +set.seed(123) +stratified_hal_fit <- stratified_hal$train(task) +stratified_prediction <- stratified_hal_fit$predict(task = task) +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/man/Lrnr_subset_covariates.Rd b/man/Lrnr_subset_covariates.Rd index 82dc2269..549072e8 100644 --- a/man/Lrnr_subset_covariates.Rd +++ b/man/Lrnr_subset_covariates.Rd @@ -35,6 +35,28 @@ by all learners. } } +\examples{ +# load example data +data(cpp_imputed) +covars <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs", "sexn") +outcome <- "haz" + +# create sl3 task +task <- sl3_Task$new(data.table::copy(cpp_imputed), + covariates = covars, + outcome = outcome, + folds = origami::make_folds(cpp_imputed, V = 3) +) + +glm_learner <- Lrnr_glm$new() +glmnet_learner <- Lrnr_glmnet$new() +subset_apgar <- Lrnr_subset_covariates$new(covariates = c("apgar1", "apgar5")) +learners <- list(glm_learner, glmnet_learner, subset_apgar) +sl <- make_learner(Lrnr_sl, learners, glm_learner) + +sl_fit <- sl$train(task) +sl_pred <- sl_fit$predict() +} \seealso{ Other Learners: \code{\link{Custom_chain}}, diff --git a/tests/testthat/test-add_many_columns.R b/tests/testthat/test-add_many_columns.R index 47956467..655f3628 100644 --- a/tests/testthat/test-add_many_columns.R +++ b/tests/testthat/test-add_many_columns.R @@ -8,9 +8,9 @@ test_that("columns were added successful", { data(cpp_imputed) covars <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs", "sexn") outcome <- "haz" - + task <- sl3_Task$new(cpp_imputed, covariates = covars, outcome = outcome) - + for (i in 1:1e4) { new_data <- data.table(A = rnorm(task$nrow)) z <- task$add_columns(new_data) diff --git a/tests/testthat/test-bartMachine.R b/tests/testthat/test-bartMachine.R index cbc4fd44..65f65409 100644 --- a/tests/testthat/test-bartMachine.R +++ b/tests/testthat/test-bartMachine.R @@ -19,14 +19,14 @@ test_that("Lrnr_bartMachine produces results matching those of bartMachine::bart fit_sl3 <- lrnr_bartMachine$train(task) preds_sl3 <- fit_sl3$predict(task) rmse_sl3 <- sqrt(mean((preds_sl3 - task$Y)^2)) - + # classic fit fit_classic <- bartMachine::bartMachine( X = data.frame(task$X), y = task$Y, seed = 196, verbose = FALSE ) preds_classic <- as.numeric(predict(fit_classic, new_data = task$X)) rmse_classic <- sqrt(mean((preds_classic - task$Y)^2)) - + # check equality expect_equal(rmse_sl3, rmse_classic, tolerance = 0.1) }) @@ -36,10 +36,10 @@ test_that("Lrnr_bartMachine does not fail when cross-validated", { Lrnr_bartMachine, verbose = FALSE )) - + cv_lrnr_bartMachine <- Lrnr_cv$new(lrnr_bartMachine) fit_cv <- cv_lrnr_bartMachine$train(task) preds_cv <- fit_cv$predict(task) - + expect_equal(length(preds_cv), nrow(task$data)) }) diff --git a/tests/testthat/test-binomial_learners.R b/tests/testthat/test-binomial_learners.R index abbd68a8..7ce52ae1 100644 --- a/tests/testthat/test-binomial_learners.R +++ b/tests/testthat/test-binomial_learners.R @@ -68,6 +68,6 @@ test_that("Lrnr_sl binomial integration test", { coefs <- coef(sl_fit$fit_object$cv_meta_fit) preds <- sl_fit$predict() loss <- sl_fit$cv_risk(loss_loglik_binomial) - + expect_equal(length(preds), nrow(task$data)) }) diff --git a/tests/testthat/test-caret.R b/tests/testthat/test-caret.R index bd487024..2aeab689 100644 --- a/tests/testthat/test-caret.R +++ b/tests/testthat/test-caret.R @@ -94,7 +94,7 @@ test_that("Lrnr_caret RF match caret RF preds for binary classification", { fit_lrnr_caret_rf <- lrnr_caret_rf$train(task_binaryY) prd_lrnr_caret_rf <- fit_lrnr_caret_rf$predict() prd_lrnr_caret_rf <- as.numeric(prd_lrnr_caret_rf > 0.5) - + ## fit caret RF using the data from the task set.seed(1530) fit_caret_rf <- suppressWarnings(caret::train( @@ -108,7 +108,7 @@ test_that("Lrnr_caret RF match caret RF preds for binary classification", { predict(fit_caret_rf, newdata = task$X, type = "prob")[, 2] ) prd_caret_rf <- as.numeric(prd_caret_rf > 0.5) - + expect_equal(sum(abs(prd_lrnr_caret_rf - prd_caret_rf)), 0, tolerance = 1) }) @@ -161,6 +161,6 @@ test_that("Lrnr_caret RF preds match caret RF preds for binary regression", { )) prd_caret_rf <- as.numeric(predict(fit_caret_rf, newdata = task$X)) rmse_classic <- sqrt(mean((prd_caret_rf - task_binaryY$Y)^2)) - + expect_equal(rmse_sl3, rmse_classic, tolerance = 0.1) }) diff --git a/tests/testthat/test-character_covariates.R b/tests/testthat/test-character_covariates.R index f64f1509..b4e1914f 100644 --- a/tests/testthat/test-character_covariates.R +++ b/tests/testthat/test-character_covariates.R @@ -5,11 +5,11 @@ test_that("character covariates are cast to factors", { covars <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs", "sexn") outcome <- "haz" cpp_imputed$sexn <- as.character(cpp_imputed$sexn) - + expect_warning( task_character_to_factor <- make_sl3_Task(cpp_imputed, covars, outcome), "Character variables found: sexn;\nConverting these to factors" ) - + expect_equal(class(task_character_to_factor$get_node("covariates")$sexn), "factor") }) diff --git a/tests/testthat/test-cv_sl.R b/tests/testthat/test-cv_sl.R index f23495f2..4f6ff5a6 100644 --- a/tests/testthat/test-cv_sl.R +++ b/tests/testthat/test-cv_sl.R @@ -9,9 +9,9 @@ test_that("cross-validated super learner works", { covars <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs", "sexn") outcome <- "haz" task <- sl3_Task$new(data.table::copy(cpp_imputed), - covariates = covars, - outcome = outcome, - folds = origami::make_folds(cpp_imputed, V = 3) + covariates = covars, + outcome = outcome, + folds = origami::make_folds(cpp_imputed, V = 3) ) glm_learner <- Lrnr_glm$new() glmnet_learner <- Lrnr_pkg_SuperLearner$new("SL.glmnet") @@ -20,6 +20,6 @@ test_that("cross-validated super learner works", { sl <- make_learner(Lrnr_sl, learners, glm_learner) sl_fit <- sl$train(task) cv_sl_fit <- cv_sl(sl_fit, task, loss_squared_error) - + expect_false(any(is.na(cv_sl_fit))) }) diff --git a/tests/testthat/test-dbarts.R b/tests/testthat/test-dbarts.R index c10197ff..ccd33cac 100644 --- a/tests/testthat/test-dbarts.R +++ b/tests/testthat/test-dbarts.R @@ -36,7 +36,7 @@ test_that("Lrnr_dbarts produces results matching those of dbarts::barts", { # get predictions from classic implementation set.seed(123) fit_classic <- dbarts::bart( - x.train = data.frame(task$X), y.train = task$Y, keeptrees = TRUE, + x.train = data.frame(task$X), y.train = task$Y, keeptrees = TRUE, ndpost = 500, verbose = FALSE ) diff --git a/tests/testthat/test-delayed_sl3.R b/tests/testthat/test-delayed_sl3.R index 84c5baeb..dfd7fb2a 100644 --- a/tests/testthat/test-delayed_sl3.R +++ b/tests/testthat/test-delayed_sl3.R @@ -5,9 +5,10 @@ plan(sequential) data(cpp_imputed) task <- sl3_Task$new( - cpp_imputed, + cpp_imputed, covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs", "sexn"), - outcome = "haz") + outcome = "haz" +) lrnr_rf <- Lrnr_randomForest$new() @@ -17,7 +18,7 @@ test_that("FutureJob using delayed is reproducible", { sched <- Scheduler$new(test_delayed, FutureJob) fit_delayed <- sched$compute() preds_delayed <- fit_delayed$predict() - + set.seed(123) options(sl3.enable.future = TRUE) # the default fit <- lrnr_rf$train(task) @@ -31,7 +32,7 @@ test_that("SequentialJob is reproducible", { sched <- Scheduler$new(test_delayed, SequentialJob) fit_delayed <- sched$compute() preds_delayed <- fit_delayed$predict() - + set.seed(123) options(sl3.enable.future = FALSE) fit <- lrnr_rf$train(task) diff --git a/tests/testthat/test-density-pooled_hazards.R b/tests/testthat/test-density-pooled_hazards.R index 0413950f..d135b97a 100644 --- a/tests/testthat/test-density-pooled_hazards.R +++ b/tests/testthat/test-density-pooled_hazards.R @@ -9,7 +9,7 @@ test_that("Negative log likelihood for pooled_hazards and haldensify match", { y <- 3 * x + epsilon data <- data.table(x = x, y = y) task <- sl3_Task$new(data, covariates = c("x"), outcome = "y") - + # instantiate learners hal <- Lrnr_hal9001$new( lambda = exp(seq(-1, -13, length = 100)), @@ -30,21 +30,20 @@ test_that("Negative log likelihood for pooled_hazards and haldensify match", { type = "equal_range", n_bins = 5 ) - + # fit discrete density model to pooled hazards data set.seed(74294) fit_density <- density_learner$train(task) pred_density <- fit_density$predict() - + # fit haldensify for comparison set.seed(74294) fit_haldensify <- haldensify$train(task) pred_haldensify <- fit_haldensify$predict() - + # compare density estimates true_density <- dnorm(x = y, mean = 3 * x) nll_ph <- sum(-1 * true_density * log(pred_density)) nll_haldensify <- sum(-1 * true_density * log(pred_haldensify)) expect_equal(nll_ph, nll_haldensify, scale = nll_ph, tol = 2) - }) diff --git a/tests/testthat/test-density-semiparametric.R b/tests/testthat/test-density-semiparametric.R index f0f373da..de81182d 100644 --- a/tests/testthat/test-density-semiparametric.R +++ b/tests/testthat/test-density-semiparametric.R @@ -2,54 +2,54 @@ context("test-density-semiparametric.R -- Lrnr_density_semiparametric") test_that("Lrnr_density_semiparametric works", { set.seed(1234) - + # define test dataset n <- 1e6 x <- runif(n, 0, 3) epsilon_x <- rnorm(n, 0, 0.5 + sqrt(x)) # epsilon_x <- rnorm(n) y <- 3 * x + epsilon_x - + data <- data.table(x = x, x2 = x^2, y = y) covariates <- c("x") task <- make_sl3_Task(data, covariates = covariates, outcome = "y") - + # train hse_learner <- make_learner(Lrnr_density_semiparametric, - mean_learner = make_learner(Lrnr_glm) + mean_learner = make_learner(Lrnr_glm) ) - + mvd_learner <- make_learner(Lrnr_density_semiparametric, - mean_learner = make_learner(Lrnr_glm), - var_learner = make_learner(Lrnr_glm) + mean_learner = make_learner(Lrnr_glm), + var_learner = make_learner(Lrnr_glm) ) - + hse_fit <- hse_learner$train(task) mvd_fit <- mvd_learner$train(task) - + # test sampling y_samp <- mvd_fit$sample(task[1:10], 100) - + x_grid <- seq(from = min(data$x), to = max(data$x), length = 100) y_grid <- seq(from = min(data$y), to = 1.5 * max(data$y), length = 100) pred_data <- as.data.table(expand.grid(x = x_grid, y = y_grid)) pred_data$x2 <- pred_data$x^2 pred_task <- make_sl3_Task(pred_data, covariates = covariates, outcome = "y") - + pred_data$hse_preds <- hse_fit$predict(pred_task) pred_data$mvd_preds <- mvd_fit$predict(pred_task) pred_data[, true_dens := dnorm(x = y, mean = 3 * x, sd = abs(x))] - + nll <- function(observed, pred) { res <- -1 * observed * log(pred) res[observed < .Machine$double.eps] <- 0 - + return(res) } - + hse_nll <- sum(nll(pred_data$true_dens, pred_data$hse_preds)) mvd_nll <- sum(nll(pred_data$true_dens, pred_data$mvd_preds)) - + expect_lt(hse_nll, n) expect_lt(mvd_nll, hse_nll) # long <- melt(pred_data, id = c("x", "y", "true_dens"), measure = c("hse_preds", "mvd_preds", "true_dens")) diff --git a/tests/testthat/test-density_hse.R b/tests/testthat/test-density_hse.R index 69e857a8..e1211772 100644 --- a/tests/testthat/test-density_hse.R +++ b/tests/testthat/test-density_hse.R @@ -3,24 +3,25 @@ context("test-density_hse.R -- Lrnr_density_hse") test_that("density_hse produces same results as density estimates from glm", { data(cpp_imputed) task <- sl3_Task$new( - cpp_imputed, - covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"), - outcome = "haz") - + cpp_imputed, + covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs"), + outcome = "haz" + ) + lrnr_density_hse <- Lrnr_density_hse$new(mean_learner = Lrnr_glm$new()) lrnr_glm <- Lrnr_glm$new() - + fit_density_hse <- lrnr_density_hse$train(task) fit_glm <- lrnr_glm$train(task) - + # density hse preds_density_hse <- fit_density_hse$predict() - + # density from glm mean_preds <- fit_glm$predict() errors <- task$Y - mean_preds dens_fit <- density(errors) preds_glm_density <- approx(dens_fit$x, dens_fit$y, errors, rule = 2)$y - + expect_equal(preds_density_hse, preds_glm_density) }) diff --git a/tests/testthat/test-density_utils.R b/tests/testthat/test-density_utils.R index 592196ce..0c9e4f4b 100644 --- a/tests/testthat/test-density_utils.R +++ b/tests/testthat/test-density_utils.R @@ -4,11 +4,11 @@ test_that("make_bins() works with equal range binning", { x <- c(0, 1, 9) bins <- make_bins(x, type = "equal_range", n_bins = 2) expect_equal(bins, c(0, 4.5, 9)) - + x <- c(-10, 1, 2, 3, 10) bins <- make_bins(x, type = "equal_range", n_bins = 5) expect_equal(bins, c(-10, -6, -2, 2, 6, 10)) - + x <- c(-10, 1, 2, 3, 9) bins <- make_bins(x, type = "equal_range", n_bins = 5) expect_equal(bins, c(-10, -6.2, -2.4, 1.4, 5.2, 9.0)) @@ -18,11 +18,11 @@ test_that("make_bins() works with equal mass binning", { x <- c(0, 1, 9) bins <- make_bins(x, type = "equal_mass", n_bins = 2) expect_equal(bins, c(0, 1, 9)) - + x <- c(-10, 0, 0, 10) bins <- make_bins(x, type = "equal_mass", n_bins = 2) expect_equal(bins, c(-10, 0, 10)) - + x <- c(-10, -2, -2, 1, 1, 1, 10) bins <- make_bins(x, type = "equal_mass", n_bins = 3) expect_equal(bins, c(-10, -2, 1, 10)) @@ -30,7 +30,7 @@ test_that("make_bins() works with equal mass binning", { test_that("discretize_variable() works with equal range binning", { x <- 1:10 - + expected_res <- list( x_discrete = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2), x_in_bin = c(0, 1, 2, 3, 4, 0.5, 1.5, 2.5, 3.5, 4.5), @@ -43,13 +43,13 @@ test_that("discretize_variable() works with equal range binning", { test_that("discretize_variable() works with equal mass binning", { x <- 1:10 - + expected_res <- list( x_discrete = c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3), x_in_bin = c(0, 1, 2, 0, 1, 2, 0, 1, 2, 3), breaks = c(1, 4, 7, 10) ) - + res <- discretize_variable(x, type = "equal_mass", n_bins = 3) expect_equal(res, expected_res) }) diff --git a/tests/testthat/test-ga.R b/tests/testthat/test-ga.R index 80587e8e..178518d7 100644 --- a/tests/testthat/test-ga.R +++ b/tests/testthat/test-ga.R @@ -4,17 +4,17 @@ test_that("GA works as a metalearner", { data(cpp_imputed) covs <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs") task <- sl3_Task$new(cpp_imputed, covariates = covs, outcome = "haz") - + lasso_lrnr <- Lrnr_glmnet$new() glm_lrnr <- Lrnr_glm$new() ranger_lrnr <- Lrnr_ranger$new() lrnrs <- c(lasso_lrnr, glm_lrnr, ranger_lrnr) names(lrnrs) <- c("lasso", "glm", "ranger") lrnr_stack <- make_learner(Stack, lrnrs) - + ga <- Lrnr_ga$new() sl <- Lrnr_sl$new(lrnr_stack, ga) sl_fit <- sl$train(task) - + expect_equal(sum(sl_fit$coefficients), 1) }) diff --git a/tests/testthat/test-lightgbm.R b/tests/testthat/test-lightgbm.R index a6254593..98848ea8 100644 --- a/tests/testthat/test-lightgbm.R +++ b/tests/testthat/test-lightgbm.R @@ -122,7 +122,8 @@ test_that("Lrnr_lightgbm predictions match lightgbm's: categorical outcome", { set.seed(73964) lrnr_lightgbm <- Lrnr_lightgbm$new( num_leaves = 40L, verbose = -1, - num_class = as.integer(length(unique(task$Y)))) + num_class = as.integer(length(unique(task$Y))) + ) fit_lrnr_lightgbm <- lrnr_lightgbm$train(task) prd_lrnr_lightgbm <- unpack_predictions(fit_lrnr_lightgbm$predict()) diff --git a/tests/testthat/test-mean.R b/tests/testthat/test-mean.R index 5375935f..c40f4c0f 100644 --- a/tests/testthat/test-mean.R +++ b/tests/testthat/test-mean.R @@ -4,10 +4,10 @@ test_that("Lrnr_mean predictions are the same as simple means", { data(cpp_imputed) covs <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs") task <- sl3_Task$new(cpp_imputed, covariates = covs, outcome = "haz") - + lrnr_mean <- make_learner(Lrnr_mean) mean_fit <- lrnr_mean$train(task) mean_preds <- mean_fit$predict() - + expect_equal(mean_preds, rep(mean(task$Y), nrow(cpp_imputed))) }) diff --git a/tests/testthat/test-nnet.R b/tests/testthat/test-nnet.R index e30487f3..e22ce5ec 100644 --- a/tests/testthat/test-nnet.R +++ b/tests/testthat/test-nnet.R @@ -8,8 +8,10 @@ outcome <- "haz" task <- sl3_Task$new(cpp_imputed, covariates = covars, outcome = outcome) test_that("Lrnr_nnet gives the correct output for non-binomial outcome", { - lrnr_nnet <- Lrnr_nnet$new(linout = TRUE, size = 10, maxit = 1000, - trace = FALSE) + lrnr_nnet <- Lrnr_nnet$new( + linout = TRUE, size = 10, maxit = 1000, + trace = FALSE + ) fit <- lrnr_nnet$train(task) preds <- fit$predict(task) expect_equal(task$nrow, length(preds)) @@ -19,8 +21,10 @@ test_that("Lrnr_nnet gives the correct output for non-binomial outcome", { test_that("Lrnr_nnet gives the correct output for binomial outcome", { covars <- c("bmi", "parity", "mage") task <- sl3_Task$new(cpp_imputed, covariates = covars, outcome = "sexn") - lrnr_nnet <- Lrnr_nnet$new(linout = TRUE, size = 10, maxit = 1000, - trace = FALSE) + lrnr_nnet <- Lrnr_nnet$new( + linout = TRUE, size = 10, maxit = 1000, + trace = FALSE + ) fit <- lrnr_nnet$train(task) preds <- fit$predict(task) expect_equal(task$nrow, length(preds)) diff --git a/tests/testthat/test-offset.R b/tests/testthat/test-offset.R index f4761234..e805a679 100644 --- a/tests/testthat/test-offset.R +++ b/tests/testthat/test-offset.R @@ -48,17 +48,17 @@ newoffset_task <- sl3_Task$new( # specifically test lrnr_glm against base glm test_that("Lrnr_glm_fast produces same results as base glm with offset", { lrnr_glm <- make_learner(Lrnr_glm_fast) - + fit <- lrnr_glm$train(task) offset_fit <- lrnr_glm$train(offset_task) - + preds <- fit$predict() offset_preds <- offset_fit$predict() expect_false(isTRUE(all.equal(preds, offset_preds))) - + glm_fit <- glm(A ~ W1 + W2 + W3 + W4, data, family = binomial()) expect_equivalent(coef(glm_fit), coef(fit)) - + glm_offset_fit <- glm( A ~ W1 + W2 + W3 + W4, data, family = binomial(), @@ -106,7 +106,7 @@ test_that("Offset works for learners that support it", { "Lrnr_glm", "Lrnr_glm_fast", "Lrnr_mean", "Lrnr_xgboost" ) - + offset_learners <- offset_learner_stack$params$learners lapply( offset_learners, test_learner_offset_support, diff --git a/tests/testthat/test-pipeline.R b/tests/testthat/test-pipeline.R index a52b324a..dffa13f4 100644 --- a/tests/testthat/test-pipeline.R +++ b/tests/testthat/test-pipeline.R @@ -15,13 +15,17 @@ test_screen_pipe <- function(screen_name_SuperLearner) { screen_learner <- Lrnr_pkg_SuperLearner_screener$new(screen_name_SuperLearner) screen_glm <- make_learner(Pipeline, screen_learner, glm_learner) fit <- screen_glm$train(task) - - expect_equal(fit$fit_object$learner_fits[[1]]$fit_object$selected, - names(fit$fit_object$learner_fits$Lrnr_glm_TRUE$coefficients)[-1]) + + expect_equal( + fit$fit_object$learner_fits[[1]]$fit_object$selected, + names(fit$fit_object$learner_fits$Lrnr_glm_TRUE$coefficients)[-1] + ) } test_that("Pipeline pipes selected covariates from screening algorithms", { - screens <- c("screen.glmnet", "screen.corP", "screen.corRank", - "screen.randomForest", "screen.SIS") + screens <- c( + "screen.glmnet", "screen.corP", "screen.corRank", + "screen.randomForest", "screen.SIS" + ) lapply(screens, test_screen_pipe) }) diff --git a/tests/testthat/test-randomForest.R b/tests/testthat/test-randomForest.R index e71a988c..eaaf2ab5 100644 --- a/tests/testthat/test-randomForest.R +++ b/tests/testthat/test-randomForest.R @@ -4,21 +4,23 @@ test_that("Lrnr_randomForest predictions are the same as original package", { data(cpp_imputed) covs <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs") task <- sl3_Task$new(cpp_imputed, covariates = covs, outcome = "haz") - + lrnr_rf <- make_learner(Lrnr_randomForest) set.seed(123) lrnr_rf_fit <- lrnr_rf$train(task) sl3_preds <- as.numeric(lrnr_rf_fit$predict()) sl3_mse <- mean((task$Y - sl3_preds)^2) - + set.seed(123) - rf_fit <- randomForest(x = task$X, y = task$Y, - ntree = lrnr_rf$params$ntree, - keep.forest = lrnr_rf$params$keep.forest, - nodesize = lrnr_rf$params$keep.forest, - mtry = floor(ncol(task$X))) + rf_fit <- randomForest( + x = task$X, y = task$Y, + ntree = lrnr_rf$params$ntree, + keep.forest = lrnr_rf$params$keep.forest, + nodesize = lrnr_rf$params$keep.forest, + mtry = floor(ncol(task$X)) + ) rf_preds <- as.numeric(predict(rf_fit, task$data)) classic_mse <- mean((task$Y - rf_preds)^2) - + expect_equal(sl3_mse, classic_mse, tolerance = 0.05) }) diff --git a/tests/testthat/test-ranger.R b/tests/testthat/test-ranger.R index 485fa1b8..dd80d224 100644 --- a/tests/testthat/test-ranger.R +++ b/tests/testthat/test-ranger.R @@ -62,13 +62,14 @@ test_that("Lrnr_ranger predictions match those from ranger", { prd_lrnr_ranger <- fit_lrnr_ranger$predict() ## fit ranger using the data from the task - fit_ranger <- ranger(mpg ~ ., - num.trees = lrnr_ranger$params$num.trees, - write.forest = lrnr_ranger$params$write.forest, - importance = lrnr_ranger$params$importance, - num.threads = lrnr_ranger$params$num.threads, - seed = 123, - data = task$data) + fit_ranger <- ranger(mpg ~ ., + num.trees = lrnr_ranger$params$num.trees, + write.forest = lrnr_ranger$params$write.forest, + importance = lrnr_ranger$params$importance, + num.threads = lrnr_ranger$params$num.threads, + seed = 123, + data = task$data + ) prd_ranger <- predict(fit_ranger, data = task$data)[[1]] ## test equivalence of prediction from Lrnr_ranger and ranger::ranger diff --git a/tests/testthat/test-rugarch.R b/tests/testthat/test-rugarch.R index 0fd9dbde..b597aa24 100644 --- a/tests/testthat/test-rugarch.R +++ b/tests/testthat/test-rugarch.R @@ -4,13 +4,13 @@ library(rugarch) test_that("Lrnr_rugarch predictions are the same as classic implementation", { data(bsds) - + # make folds appropriate for time-series cross-validation folds <- make_folds(bsds, - fold_fun = folds_rolling_window, window_size = 500, - validation_size = 100, gap = 0, batch = 50 + fold_fun = folds_rolling_window, window_size = 500, + validation_size = 100, gap = 0, batch = 50 ) - + # build task by passing in external folds structure task <- sl3_Task$new( data = bsds, @@ -20,22 +20,26 @@ test_that("Lrnr_rugarch predictions are the same as classic implementation", { ), outcome = "cnt" ) - + # create tasks for training and validation train_task <- training(task, fold = task$folds[[1]]) valid_task <- validation(task, fold = task$folds[[1]]) - + lrnr_rugarch <- Lrnr_rugarch$new() - + # sl3 implementation fit_sl3 <- lrnr_rugarch$train(train_task) pred_sl3 <- fit_sl3$predict(valid_task) - + # classical implementation fit_classic <- rugarch::ugarchfit(ugarchspec(), train_task$X) - pred_classic <- ugarchforecast(fit_classic, data = valid_task$X, - n.ahead = ts_get_pred_horizon(train_task, - valid_task)) + pred_classic <- ugarchforecast(fit_classic, + data = valid_task$X, + n.ahead = ts_get_pred_horizon( + train_task, + valid_task + ) + ) pred_classic <- as.numeric(pred_classic@forecast$seriesFor) expect_equal(pred_sl3, pred_classic) }) diff --git a/tests/testthat/test-sl-timeseries.R b/tests/testthat/test-sl-timeseries.R index bc5bd9bf..1469da28 100644 --- a/tests/testthat/test-sl-timeseries.R +++ b/tests/testthat/test-sl-timeseries.R @@ -12,35 +12,32 @@ test_that("validation set for time-series is as expected", { covars <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs", "sexn") outcome <- "haz" data <- cpp_imputed[1:100, ] - + folds <- origami::make_folds(data, - fold_fun = folds_rolling_window, window_size = 25, - validation_size = 25, gap = 0, batch = 10 + fold_fun = folds_rolling_window, window_size = 25, + validation_size = 25, gap = 0, batch = 10 ) task <- make_sl3_Task( data = data, covariates = covars, outcome = outcome, folds = folds ) - + lrnr_glm <- make_learner(Lrnr_glm) lrnr_mean <- make_learner(Lrnr_mean) lrnr_glmnet <- make_learner(Lrnr_glmnet) - + stack <- make_learner( Stack, lrnr_glm, lrnr_mean, lrnr_glmnet ) metalearner <- make_learner(Lrnr_nnls) - + sl <- make_learner(Lrnr_sl, - learners = stack, - metalearner = metalearner + learners = stack, + metalearner = metalearner ) sl_fit <- sl$train(task) preds <- sl_fit$predict_fold(task, "validation") expect_equal(length(preds), 150) }) - - - diff --git a/tests/testthat/test-sl_fold.R b/tests/testthat/test-sl_fold.R index 5e15814d..29adaa03 100644 --- a/tests/testthat/test-sl_fold.R +++ b/tests/testthat/test-sl_fold.R @@ -10,20 +10,20 @@ test_that("sl prediction for each fold works", { covars <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs", "sexn") outcome <- "haz" task <- sl3_Task$new(data.table::copy(cpp_imputed), covariates = covars, outcome = outcome) - + glm_learner <- Lrnr_glm$new() glmnet_learner <- Lrnr_pkg_SuperLearner$new("SL.glmnet") subset_apgar <- Lrnr_subset_covariates$new(covariates = c("apgar1", "apgar5")) learners <- list(glm_learner, glmnet_learner, subset_apgar) sl1 <- make_learner(Lrnr_sl, learners, glm_learner) - + sl_fit <- sl1$train(task) - + fold1_predict <- sl_fit$predict_fold(task, 1) validation_predict <- sl_fit$predict_fold(task, "validation") expect_false(all(fold1_predict == validation_predict)) expect_true(any(fold1_predict == validation_predict)) - + glm_fit <- glm_learner$train(task) expect_warning(glm_fold1_predict <- glm_fit$predict_fold(task, 1)) })