diff --git a/NAMESPACE b/NAMESPACE index 226e43be3..9cfc25720 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -521,6 +521,7 @@ S3method(tune_args,step) S3method(update,step) export("%>%") export(.get_data_types) +export(.recipes_toggle_sparse_args) export(add_check) export(add_role) export(add_step) diff --git a/R/sparsevctrs.R b/R/sparsevctrs.R index 447769a40..05b581718 100644 --- a/R/sparsevctrs.R +++ b/R/sparsevctrs.R @@ -20,3 +20,31 @@ NULL is_sparse_matrix <- function(x) { methods::is(x, "sparseMatrix") } + +#' Toggle all auto sparse arguments +#' +#' @param x A recipe. +#' @param choice A character string for separating values. +#' +#' @details +#' If a step has an argument `sparse = "auto"`, then workflows can use this +#' function to fill these values with the preferred action. This preferred +#' action is calculated in workflows dependent on the model and data heuristics. +#' Hence why it has to be passed in. +#' +#' Only arguments where `sparse = "auto"` are affected, thus a user can set +#' `sparse = "no"` and it will be respected. +#' +#' @return A recipe +#' +#' @keywords internal +#' +#' @export +.recipes_toggle_sparse_args <- function(x, choice) { + for (i in seq_along(x$steps)) { + if (!is.null(x$steps[[i]]$sparse) && x$steps[[i]]$sparse == "auto") { + x$steps[[i]]$sparse <- choice + } + } + x +} \ No newline at end of file diff --git a/man/dot-recipes_toggle_sparse_args.Rd b/man/dot-recipes_toggle_sparse_args.Rd new file mode 100644 index 000000000..477489d31 --- /dev/null +++ b/man/dot-recipes_toggle_sparse_args.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sparsevctrs.R +\name{.recipes_toggle_sparse_args} +\alias{.recipes_toggle_sparse_args} +\title{Toggle all auto sparse arguments} +\usage{ +.recipes_toggle_sparse_args(x, choice) +} +\arguments{ +\item{x}{A recipe.} + +\item{choice}{A character string for separating values.} +} +\value{ +A recipe +} +\description{ +Toggle all auto sparse arguments +} +\details{ +If a step has an argument \code{sparse = "auto"}, then workflows can use this +function to fill these values with the preferred action. This preferred +action is calculated in workflows dependent on the model and data heuristics. +Hence why it has to be passed in. + +Only arguments where \code{sparse = "auto"} are affected, thus a user can set +\code{sparse = "no"} and it will be respected. +} +\keyword{internal} diff --git a/tests/testthat/test-sparsevctrs.R b/tests/testthat/test-sparsevctrs.R index 320e858c1..b5b0b8082 100644 --- a/tests/testthat/test-sparsevctrs.R +++ b/tests/testthat/test-sparsevctrs.R @@ -178,3 +178,106 @@ test_that("recipe() errors if sparse matrix has no colnames", { recipe(hotel_data) ) }) + +test_that(".recipes_toggle_sparse_args works", { + skip_if_not_installed("modeldata") + data("ames", package = "modeldata") + + rec_spec <- recipe(Sale_Price ~ ., data = ames) |> + step_center(all_numeric_predictors()) |> + step_center(all_numeric_predictors()) + + expect_identical( + .recipes_toggle_sparse_args(rec_spec, "yes"), + rec_spec + ) + + rec_spec_yes_yes <- recipe(Sale_Price ~ ., data = ames) |> + step_dummy(MS_Zoning, Street, sparse = "yes", id = "") |> + step_center(all_numeric_predictors(), id = "") |> + step_dummy(all_nominal_predictors(), sparse = "yes", id = "") |> + step_center(all_numeric_predictors(), id = "") + + rec_spec_no_no <- recipe(Sale_Price ~ ., data = ames) |> + step_dummy(MS_Zoning, Street, sparse = "no", id = "") |> + step_center(all_numeric_predictors(), id = "") |> + step_dummy(all_nominal_predictors(), sparse = "no", id = "") |> + step_center(all_numeric_predictors(), id = "") + + rec_spec_yes_no <- recipe(Sale_Price ~ ., data = ames) |> + step_dummy(MS_Zoning, Street, sparse = "yes", id = "") |> + step_center(all_numeric_predictors(), id = "") |> + step_dummy(all_nominal_predictors(), sparse = "no", id = "") |> + step_center(all_numeric_predictors(), id = "") + + rec_spec_no_yes <- recipe(Sale_Price ~ ., data = ames) |> + step_dummy(MS_Zoning, Street, sparse = "no", id = "") |> + step_center(all_numeric_predictors(), id = "") |> + step_dummy(all_nominal_predictors(), sparse = "yes", id = "") |> + step_center(all_numeric_predictors(), id = "") + + rec_spec_auto_yes <- recipe(Sale_Price ~ ., data = ames) |> + step_dummy(MS_Zoning, Street, id = "") |> + step_center(all_numeric_predictors(), id = "") |> + step_dummy(all_nominal_predictors(), sparse = "yes", id = "") |> + step_center(all_numeric_predictors(), id = "") + + rec_spec_auto_no <- recipe(Sale_Price ~ ., data = ames) |> + step_dummy(MS_Zoning, Street, id = "") |> + step_center(all_numeric_predictors(), id = "") |> + step_dummy(all_nominal_predictors(), sparse = "no", id = "") |> + step_center(all_numeric_predictors(), id = "") + + rec_spec_auto_auto <- recipe(Sale_Price ~ ., data = ames) |> + step_dummy(MS_Zoning, Street, id = "") |> + step_center(all_numeric_predictors(), id = "") |> + step_dummy(all_nominal_predictors(), id = "") |> + step_center(all_numeric_predictors(), id = "") + + expect_identical( + .recipes_toggle_sparse_args(rec_spec_yes_yes, "yes"), + rec_spec_yes_yes + ) + expect_identical( + .recipes_toggle_sparse_args(rec_spec_yes_yes, "no"), + rec_spec_yes_yes + ) + + expect_identical( + .recipes_toggle_sparse_args(rec_spec_no_no, "yes"), + rec_spec_no_no + ) + expect_identical( + .recipes_toggle_sparse_args(rec_spec_no_no, "no"), + rec_spec_no_no + ) + + expect_identical( + .recipes_toggle_sparse_args(rec_spec_auto_auto, "yes"), + rec_spec_yes_yes + ) + expect_identical( + .recipes_toggle_sparse_args(rec_spec_auto_auto, "no"), + rec_spec_no_no + ) + + expect_identical( + .recipes_toggle_sparse_args(rec_spec_auto_yes, "yes"), + rec_spec_yes_yes + ) + expect_identical( + .recipes_toggle_sparse_args(rec_spec_auto_yes, "no"), + rec_spec_no_yes + ) + + expect_identical( + .recipes_toggle_sparse_args(rec_spec_auto_no, "yes"), + rec_spec_yes_no + ) + expect_identical( + .recipes_toggle_sparse_args(rec_spec_auto_no, "no"), + rec_spec_no_no + ) + + +}) \ No newline at end of file