Merge pull request #1391 from tidymodels/type-checkers-part-2

Type checkers part 2
tidymodels · Nov 9, 2024 · ee360b0 · ee360b0
2 parents 28e3e4d + 1404d01
commit ee360b0
Show file tree

Hide file tree

Showing 107 changed files with 1,137 additions and 184 deletions.
diff --git a/R/YeoJohnson.R b/R/YeoJohnson.R
@@ -109,6 +109,14 @@ step_YeoJohnson_new <-
 prep.step_YeoJohnson <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
   check_type(training[, col_names], types = c("double", "integer"))
+  check_number_whole(x$num_unique, args = "num_unique")
+  check_bool(x$na_rm, arg = "na_rm")
+  if (!is.numeric(x$limits) || any(is.na(x$limits)) || length(x$limits) != 2) {
+    cli::cli_abort("{.arg limits} should be a numeric vector with two values,
+                    not {.obj_type_friendly {x$limits}}")
+  }
+
+  x$limits <- sort(x$limits)
 
   values <- vapply(
     training[, col_names],

diff --git a/R/lag.R b/R/lag.R
@@ -103,6 +103,7 @@ prep.step_lag <- function(x, training, info = NULL, ...) {
       not {.obj_type_friendly {lag}}."
     )
   }
+  check_string(x$prefix, arg = "prefix")
 
   step_lag_new(
     terms = x$terms,

diff --git a/R/lincomb.R b/R/lincomb.R
@@ -101,6 +101,7 @@ step_lincomb_new <-
 prep.step_lincomb <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
   check_type(training[, col_names], types = c("double", "integer"))
+  check_number_whole(x$max_steps, arg = "max_steps", min = 1)
 
   filter <- iter_lc_rm(
     x = training[, col_names],

diff --git a/R/log.R b/R/log.R
@@ -108,6 +108,9 @@ step_log_new <-
 prep.step_log <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
   check_type(training[, col_names], types = c("double", "integer"))
+  check_number_decimal(x$offset, arg = "offset")
+  check_bool(x$signed, arg = "signed")
+  check_number_decimal(x$base, arg = "base", min = 0)
 
   step_log_new(
     terms = x$terms,

diff --git a/R/logit.R b/R/logit.R
@@ -84,6 +84,7 @@ step_logit_new <-
 prep.step_logit <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
   check_type(training[, col_names], types = c("double", "integer"))
+  check_number_decimal(x$offset, arg = "offset")
 
   step_logit_new(
     terms = x$terms,

diff --git a/R/newvalues.R b/R/newvalues.R
@@ -116,6 +116,7 @@ new_values_func <- function(x,
 #' @export
 prep.check_new_values <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
+  check_bool(x$ignore_NA, arg = "ignore_NA")
 
   values <- lapply(training[, col_names], unique)
 

diff --git a/R/nnmf_sparse.R b/R/nnmf_sparse.R
@@ -154,6 +154,9 @@ nnmf_pen_call <- function(x) {
 prep.step_nnmf_sparse <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
   check_type(training[, col_names], types = c("double", "integer"))
+  check_number_whole(x$num_comp, arg = "num_comp", min = 0)
+  check_number_decimal(x$penalty, arg = "penalty", min = .Machine$double.eps)
+  check_string(x$prefix, arg = "prefix")
 
   if (x$num_comp > 0 && length(col_names) > 0) {
     x$num_comp <- min(x$num_comp, length(col_names))

diff --git a/R/normalize.R b/R/normalize.R
@@ -139,6 +139,7 @@ sd_check <- function(x) {
 prep.step_normalize <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
   check_type(training[, col_names], types = c("double", "integer"))
+  check_bool(x$na_rm, arg = "na_rm")
 
   wts <- get_case_weights(info, training)
   were_weights_used <- are_weights_used(wts, unsupervised = TRUE)

diff --git a/R/novel.R b/R/novel.R
@@ -121,6 +121,7 @@ get_existing_values <- function(x) {
 prep.step_novel <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
   check_type(training[, col_names], types = c("string", "factor", "ordered"))
+  check_string(x$new_level, arg = "new_level")
 
   # Get existing levels and their factor type (i.e. ordered)
   objects <- lapply(training[, col_names], get_existing_values)

diff --git a/R/num2factor.R b/R/num2factor.R
@@ -146,6 +146,7 @@ get_ord_lvls_num <- function(x, foo) {
 prep.step_num2factor <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
   check_type(training[, col_names], types = c("double", "integer"))
+  check_function(x$transform, arg = "transform")
 
   res <- lapply(training[, col_names], get_ord_lvls_num, foo = x$transform)
   res <- c(res, ..levels = list(x$levels))

diff --git a/R/nzv.R b/R/nzv.R
@@ -93,7 +93,7 @@ step_nzv <-
            skip = FALSE,
            id = rand_id("nzv")) {
     exp_list <- list(freq_cut = 95 / 5, unique_cut = 10)
-    
+
     if (!isTRUE(all.equal(exp_list, options))) {
       lifecycle::deprecate_stop(
         "0.1.7",
@@ -140,6 +140,8 @@ step_nzv_new <-
 #' @export
 prep.step_nzv <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
+  check_number_decimal(x$unique_cut, arg = "unique_cut", min = 0, max = 100)
+  check_number_decimal(x$freq_cut, arg = "freq_cut", min = 0)
 
   wts <- get_case_weights(info, training)
   were_weights_used <- are_weights_used(wts, unsupervised = TRUE)

diff --git a/R/ordinalscore.R b/R/ordinalscore.R
@@ -109,6 +109,7 @@ step_ordinalscore_new <-
 prep.step_ordinalscore <- function(x, training, info = NULL, ...) {
     col_names <- recipes_eval_select(x$terms, training, info)
     check_type(training[, col_names], types = "ordered")
+    check_function(x$convert, arg = "convert")
 
     step_ordinalscore_new(
       terms = x$terms,

diff --git a/R/other.R b/R/other.R
@@ -107,13 +107,7 @@ step_other <-
            objects = NULL,
            skip = FALSE,
            id = rand_id("other")) {
-    if (!is_tune(threshold)) {
-      if (threshold >= 1) {
-        check_number_whole(threshold)
-      } else {
-        check_number_decimal(threshold, min = 0)
-      }
-    }
+
     add_step(
       recipe,
       step_other_new(
@@ -152,6 +146,18 @@ prep.step_other <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
   check_type(training[, col_names], types = c("string", "factor", "ordered"))
 
+  if (!is.numeric(x$threshold)) {
+    cli::cli_abort("{.arg threshold} should be a single numeric value
+                   {.obj_type_friendly {x$threshold}}")
+  }
+
+  if (x$threshold >= 1) {
+    check_number_whole(x$threshold, arg = "threshold", min = 1)
+  } else {
+    check_number_decimal(x$threshold, arg = "threshold", min = 0)
+  }
+
+
   wts <- get_case_weights(info, training)
   were_weights_used <- are_weights_used(wts, unsupervised = TRUE)
   if (isFALSE(were_weights_used)) {

diff --git a/R/pca.R b/R/pca.R
@@ -127,9 +127,6 @@ step_pca <- function(recipe,
                      keep_original_cols = FALSE,
                      skip = FALSE,
                      id = rand_id("pca")) {
-  if (!is_tune(threshold)) {
-    check_number_decimal(threshold, min = 0, max = 1, allow_na = TRUE)
-  }
 
   add_step(
     recipe,
@@ -176,6 +173,10 @@ step_pca_new <-
 prep.step_pca <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
   check_type(training[, col_names], types = c("double", "integer"))
+  check_number_decimal(x$threshold, arg = "threshold", min = 0, max = 1,
+                       allow_na = TRUE)
+  check_string(x$prefix, arg = "prefix")
+  check_number_whole(x$num_comp, arg = "num_comp", min = 0)
 
   wts <- get_case_weights(info, training)
   were_weights_used <- are_weights_used(wts, unsupervised = TRUE)
@@ -352,8 +353,8 @@ tidy.step_pca <- function(x, type = "coef", ...) {
     )
   } else {
     type <- rlang::arg_match(
-      type, 
-      c("coef", "variance"), 
+      type,
+      c("coef", "variance"),
       error_call = rlang::caller_env()
     )
     if (type == "coef") {

diff --git a/R/pls.R b/R/pls.R
@@ -326,6 +326,9 @@ prep.step_pls <- function(x, training, info = NULL, ...) {
   y_names <- recipes_eval_select(x$outcome, training, info)
 
   check_type(training[, x_names], types = c("double", "integer"))
+  check_number_decimal(x$predictor_prop, arg = "predictor_prop", min = 0, max = 1)
+  check_string(x$prefix, arg = "prefix")
+  check_number_whole(x$num_comp, arg = "num_comp", min = 0)
 
   if (length(y_names) > 1 && any(!map_lgl(training[y_names], is.numeric))) {
     cli::cli_abort(

diff --git a/R/poly.R b/R/poly.R
@@ -73,14 +73,11 @@ step_poly <-
            role = "predictor",
            trained = FALSE,
            objects = NULL,
-           degree = 2,
+           degree = 2L,
            options = list(),
            keep_original_cols = FALSE,
            skip = FALSE,
            id = rand_id("poly")) {
-    if (!is_tune(degree)) {
-      degree <- as.integer(degree)
-    }
 
     if (any(names(options) == "degree")) {
       degree <- options$degree
@@ -142,6 +139,8 @@ poly_wrapper <- function(x, args) {
 prep.step_poly <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
   check_type(training[, col_names], types = c("double", "integer"))
+  check_number_whole(x$degree, arg = "degree", min = 1)
+  x$degree <- as.integer(x$degree)
 
   opts <- x$options
   opts$degree <- x$degree

diff --git a/R/poly_bernstein.R b/R/poly_bernstein.R
@@ -116,6 +116,8 @@ step_poly_bernstein_new <-
 prep.step_poly_bernstein <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
   check_type(training[, col_names], types = c("double", "integer"))
+  check_bool(x$complete_set, arg = "complete_set")
+  check_number_whole(x$degree, arg = "degree", min = 0)
 
   x$options <- c(x$options, degree = x$degree)
 

diff --git a/R/range.R b/R/range.R
@@ -4,10 +4,8 @@
 #' numeric data to be within a pre-defined range of values.
 #'
 #' @inheritParams step_center
-#' @param min A single numeric value for the smallest value in the
-#'  range.
-#' @param max A single numeric value for the largest value in the
-#'  range.
+#' @param min,max Single numeric values for the smallest (or largest) value in
+#' the transformed data.
 #' @param clipping A single logical value for determining whether
 #'  application of transformation onto new data should be forced
 #'  to be inside `min` and `max`. Defaults to TRUE.
@@ -106,6 +104,9 @@ step_range_new <-
 prep.step_range <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
   check_type(training[, col_names], types = c("double", "integer"))
+  check_number_decimal(x$min, arg = "min")
+  check_number_decimal(x$max, arg = "max")
+  check_bool(x$clipping, arg = "clipping")
 
   mins <-
     vapply(training[, col_names], min, c(min = 0), na.rm = TRUE)

diff --git a/R/ratio.R b/R/ratio.R
@@ -134,6 +134,7 @@ prep.step_ratio <- function(x, training, info = NULL, ...) {
     training[, unique(c(col_names$top, col_names$bottom))],
     types = c("double", "integer")
   )
+  check_function(x$naming, arg = "naming")
 
   step_ratio_new(
     terms = x$terms,

diff --git a/R/regex.R b/R/regex.R
@@ -63,9 +63,7 @@ step_regex <- function(recipe,
                        keep_original_cols = TRUE,
                        skip = FALSE,
                        id = rand_id("regex")) {
-  if (!is_tune(pattern)) {
-    check_string(pattern)
-  }
+
   valid_args <- names(formals(grepl))[-(1:2)]
   if (any(!(names(options) %in% valid_args))) {
     cli::cli_abort(c(
@@ -123,6 +121,7 @@ step_regex_new <-
 prep.step_regex <- function(x, training, info = NULL, ...) {
   col_name <- recipes_eval_select(x$terms, training, info)
   check_type(training[, col_name], types = c("string", "factor", "ordered"))
+  check_string(x$pattern, arg = "pattern", allow_empty = FALSE)
 
   step_regex_new(
     terms = x$terms,

diff --git a/R/relevel.R b/R/relevel.R
@@ -85,6 +85,7 @@ step_relevel_new <-
 prep.step_relevel <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
   check_type(training[, col_names], types = c("string", "factor"))
+  check_string(x$ref_level, arg = "ref_level", allow_empty = FALSE)
 
   # Get existing levels and their factor type (i.e. ordered)
   objects <- lapply(training[, col_names], get_existing_values)

diff --git a/R/relu.R b/R/relu.R
@@ -82,18 +82,6 @@ step_relu <-
            columns = NULL,
            skip = FALSE,
            id = rand_id("relu")) {
-    if (!is_tune(shift)) {
-      check_number_decimal(shift)
-    }
-    if (!is_tune(reverse)) {
-      check_bool(reverse)
-    }
-    if (!is_tune(smooth)) {
-      check_bool(smooth)
-    }
-    if (reverse & prefix == "right_relu_") {
-      prefix <- "left_relu_"
-    }
     add_step(
       recipe,
       step_relu_new(
@@ -132,6 +120,12 @@ step_relu_new <-
 prep.step_relu <- function(x, training, info = NULL, ...) {
   columns <- recipes_eval_select(x$terms, training, info)
   check_type(training[, columns], types = c("double", "integer"))
+  check_number_decimal(x$shift, arg = "shift")
+  check_bool(x$reverse, arg = "reverse")
+  check_bool(x$smooth, arg = "smooth")
+  if (x$reverse & x$prefix == "right_relu_") {
+    x$prefix <- "left_relu_"
+  }
 
   step_relu_new(
     terms = x$terms,

diff --git a/R/sample.R b/R/sample.R
@@ -69,18 +69,6 @@ step_sample <- function(recipe,
     cli::cli_warn("Selectors are not used for this step.")
   }
 
-  if (!is_tune(size)) {
-    check_number_decimal(size, min = 0, allow_null = TRUE)
-  }
-  if (!is_tune(replace)) {
-    if (!is.logical(replace)) {
-      cli::cli_abort(
-        "{.arg replace} should be a single logical, \\
-        not {.obj_type_friendly {replace}}."
-      )
-    }
-  }
-
   add_step(
     recipe,
     step_sample_new(
@@ -113,6 +101,9 @@ step_sample_new <-
 
 #' @export
 prep.step_sample <- function(x, training, info = NULL, ...) {
+
+  check_number_decimal(x$size, min = 0, allow_null = TRUE, arg = "size")
+  check_bool(x$replace, arg = "replace")
   if (is.null(x$size)) {
     x$size <- nrow(training)
   }

diff --git a/R/scale.R b/R/scale.R
@@ -107,19 +107,20 @@ step_scale_new <-
 prep.step_scale <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
   check_type(training[, col_names], types = c("double", "integer"))
+  check_bool(x$na_rm, arg = "na_rm")
+  if (x$factor != 1 & x$factor != 2) {
+    cli::cli_warn(
+      "Scaling {.arg factor} should take either a value of 1 or 2, not
+       {.obj_type_friendly {x$factor}}."
+    )
+  }
 
   wts <- get_case_weights(info, training)
   were_weights_used <- are_weights_used(wts, unsupervised = TRUE)
   if (isFALSE(were_weights_used)) {
     wts <- NULL
   }
 
-  if (x$factor != 1 & x$factor != 2) {
-    cli::cli_warn(
-      "Scaling {.arg factor} should take either a value of 1 or 2, \\
-      not {x$factor}."
-    )
-  }
 
   vars <- variances(training[, col_names], wts, na_rm = x$na_rm)
   sds <- sqrt(vars)

diff --git a/R/spatialsign.R b/R/spatialsign.R
@@ -116,6 +116,7 @@ step_spatialsign_new <-
 prep.step_spatialsign <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
   check_type(training[, col_names], types = c("double", "integer"))
+  check_bool(x$na_rm, arg = "na_rm")
 
   wts <- get_case_weights(info, training)
   were_weights_used <- are_weights_used(wts, unsupervised = TRUE)

diff --git a/R/spline_b.R b/R/spline_b.R
@@ -136,7 +136,10 @@ step_spline_b_new <-
 prep.step_spline_b <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
   check_type(training[, col_names], types = c("double", "integer"))
-
+  check_bool(x$complete_set, arg = "complete_set")
+  check_number_whole(x$degree, arg = "degree", min = 0)
+  check_number_whole(x$deg_free, arg = "deg_free", min = 0)
+
   res <- list()
 
   for (col_name in col_names) {