From f58f4fb9820ba78da14152a923c130bbb80843ef Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Fri, 10 Jan 2025 16:10:24 -0800
Subject: [PATCH] change sparse argument of step_dummy() to be an enum

---
 NEWS.md                        |  2 +-
 R/dummy.R                      | 13 +++++++------
 man/step_dummy.Rd              |  9 +++++----
 tests/testthat/_snaps/dummy.md |  4 ++--
 tests/testthat/test-dummy.R    | 10 +++++-----
 5 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 5f1c02ce1..ac03ae4f8 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -10,7 +10,7 @@
 
 * All steps and checks now require arguments `trained`, `skip`, `role`, and `id` at all times.
 
-* `step_dummy()` gained `sparse` argument. When set to `TRUE`, `step_dummy()` will produce sparse vectors. (#1392)
+* `step_dummy()` gained `sparse` argument. When set to `"yes"`, `step_dummy()` will produce sparse vectors. (#1392)
 
 # recipes 1.1.0
 
diff --git a/R/dummy.R b/R/dummy.R
index 19e85ea8b..9ef8c0911 100644
--- a/R/dummy.R
+++ b/R/dummy.R
@@ -18,9 +18,10 @@
 #' @param levels A list that contains the information needed to create dummy
 #'   variables for each variable contained in `terms`. This is `NULL` until the
 #'   step is trained by [prep()].
-#' @param sparse A logical. Should the columns produced be sparse vectors.
-#'   Sparsity is only supported for `"contr.treatment"` contrasts. Defaults to 
-#'   `FALSE`.
+#' @param sparse A single string. Should the columns produced be sparse vectors.
+#'   Can take the values `"yes"`, `"no"`, and `"auto"`. If `sparse = "auto"`
+#'   then workflows can determine the best option. Sparsity is only  supported 
+#'   for `"contr.treatment"` contrasts. Defaults to `"auto"`.
 #' @template step-return
 #' @family dummy variable and encoding steps
 #' @seealso [dummy_names()]
@@ -125,7 +126,7 @@ step_dummy <-
            preserve = deprecated(),
            naming = dummy_names,
            levels = NULL,
-           sparse = FALSE,
+           sparse = "auto",
            keep_original_cols = FALSE,
            skip = FALSE,
            id = rand_id("dummy")) {
@@ -181,7 +182,7 @@ prep.step_dummy <- function(x, training, info = NULL, ...) {
   check_type(training[, col_names], types = c("factor", "ordered"))
   check_bool(x$one_hot, arg = "one_hot")
   check_function(x$naming, arg = "naming", allow_empty = FALSE)
-  check_bool(x$sparse, arg = "sparse")
+  rlang::arg_match0(x$sparse, c("auto", "yes", "no"), arg_nm = "sparse")
 
   if (length(col_names) > 0) {
     ## I hate doing this but currently we are going to have
@@ -301,7 +302,7 @@ bake.step_dummy <- function(object, new_data, ...) {
       ordered = is_ordered
     )
 
-    if (object$sparse) {
+    if (object$sparse == "yes") {
       current_contrast <- getOption("contrasts")[is_ordered + 1]
       if (current_contrast != "contr.treatment") {
         cli::cli_abort(
diff --git a/man/step_dummy.Rd b/man/step_dummy.Rd
index 1a6b91560..308622adc 100644
--- a/man/step_dummy.Rd
+++ b/man/step_dummy.Rd
@@ -13,7 +13,7 @@ step_dummy(
   preserve = deprecated(),
   naming = dummy_names,
   levels = NULL,
-  sparse = FALSE,
+  sparse = "auto",
   keep_original_cols = FALSE,
   skip = FALSE,
   id = rand_id("dummy")
@@ -47,9 +47,10 @@ columns. See Details below.}
 variables for each variable contained in \code{terms}. This is \code{NULL} until the
 step is trained by \code{\link[=prep]{prep()}}.}
 
-\item{sparse}{A logical. Should the columns produced be sparse vectors.
-Sparsity is only supported for \code{"contr.treatment"} contrasts. Defaults to
-\code{FALSE}.}
+\item{sparse}{A single string. Should the columns produced be sparse vectors.
+Can take the values \code{"yes"}, \code{"no"}, and \code{"auto"}. If \code{sparse = "auto"}
+then workflows can determine the best option. Sparsity is only  supported
+for \code{"contr.treatment"} contrasts. Defaults to \code{"auto"}.}
 
 \item{keep_original_cols}{A logical to keep the original variables in the
 output. Defaults to \code{FALSE}.}
diff --git a/tests/testthat/_snaps/dummy.md b/tests/testthat/_snaps/dummy.md
index 2b9da790c..cc99c2702 100644
--- a/tests/testthat/_snaps/dummy.md
+++ b/tests/testthat/_snaps/dummy.md
@@ -154,10 +154,10 @@
       Caused by error in `bake()`:
       ! Only one factor level in `x`: "only-level".
 
-# sparse = TRUE errors on unsupported contrasts
+# sparse = 'yes' errors on unsupported contrasts
 
     Code
-      recipe(~., data = tibble(x = letters)) %>% step_dummy(x, sparse = TRUE) %>%
+      recipe(~., data = tibble(x = letters)) %>% step_dummy(x, sparse = "yes") %>%
         prep()
     Condition
       Error in `step_dummy()`:
diff --git a/tests/testthat/test-dummy.R b/tests/testthat/test-dummy.R
index 1d77ea4a8..b944ffb68 100644
--- a/tests/testthat/test-dummy.R
+++ b/tests/testthat/test-dummy.R
@@ -354,13 +354,13 @@ test_that("throws an informative error for single level", {
   )
 })
 
-test_that("sparse = TRUE works", {
+test_that("sparse = 'yes' works", {
   rec <- recipe(~ ., data = tibble(x = c(NA, letters)))
 
   suppressWarnings({
-    dense <- rec %>% step_dummy(x, sparse = FALSE) %>% prep() %>% bake(NULL)
+    dense <- rec %>% step_dummy(x, sparse = "no") %>% prep() %>% bake(NULL)
     dense <- purrr::map(dense, as.integer) %>% tibble::new_tibble()
-    sparse <- rec %>% step_dummy(x, sparse = TRUE) %>% prep() %>% bake(NULL)
+    sparse <- rec %>% step_dummy(x, sparse = "yes") %>% prep() %>% bake(NULL)
   })
 
   expect_identical(dense, sparse)
@@ -369,7 +369,7 @@ test_that("sparse = TRUE works", {
   expect_true(all(vapply(sparse, sparsevctrs::is_sparse_vector, logical(1))))
 })
 
-test_that("sparse = TRUE errors on unsupported contrasts", {
+test_that("sparse = 'yes' errors on unsupported contrasts", {
   go_helmert <- getOption("contrasts")
   go_helmert["unordered"] <- "contr.helmert"
   withr::local_options(contrasts = go_helmert)
@@ -377,7 +377,7 @@ test_that("sparse = TRUE errors on unsupported contrasts", {
   expect_snapshot(
     error = TRUE,
     recipe(~ ., data = tibble(x = letters)) %>% 
-      step_dummy(x, sparse = TRUE) %>% 
+      step_dummy(x, sparse = "yes") %>% 
       prep()
   )
 })