Skip to content

Commit

Permalink
change sparse argument of step_dummy() to be an enum
Browse files Browse the repository at this point in the history
  • Loading branch information
EmilHvitfeldt committed Jan 11, 2025
1 parent 76c651b commit f58f4fb
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 18 deletions.
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

* All steps and checks now require arguments `trained`, `skip`, `role`, and `id` at all times.

* `step_dummy()` gained `sparse` argument. When set to `TRUE`, `step_dummy()` will produce sparse vectors. (#1392)
* `step_dummy()` gained `sparse` argument. When set to `"yes"`, `step_dummy()` will produce sparse vectors. (#1392)

# recipes 1.1.0

Expand Down
13 changes: 7 additions & 6 deletions R/dummy.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
#' @param levels A list that contains the information needed to create dummy
#' variables for each variable contained in `terms`. This is `NULL` until the
#' step is trained by [prep()].
#' @param sparse A logical. Should the columns produced be sparse vectors.
#' Sparsity is only supported for `"contr.treatment"` contrasts. Defaults to
#' `FALSE`.
#' @param sparse A single string. Should the columns produced be sparse vectors.
#' Can take the values `"yes"`, `"no"`, and `"auto"`. If `sparse = "auto"`
#' then workflows can determine the best option. Sparsity is only supported
#' for `"contr.treatment"` contrasts. Defaults to `"auto"`.
#' @template step-return
#' @family dummy variable and encoding steps
#' @seealso [dummy_names()]
Expand Down Expand Up @@ -125,7 +126,7 @@ step_dummy <-
preserve = deprecated(),
naming = dummy_names,
levels = NULL,
sparse = FALSE,
sparse = "auto",
keep_original_cols = FALSE,
skip = FALSE,
id = rand_id("dummy")) {
Expand Down Expand Up @@ -181,7 +182,7 @@ prep.step_dummy <- function(x, training, info = NULL, ...) {
check_type(training[, col_names], types = c("factor", "ordered"))
check_bool(x$one_hot, arg = "one_hot")
check_function(x$naming, arg = "naming", allow_empty = FALSE)
check_bool(x$sparse, arg = "sparse")
rlang::arg_match0(x$sparse, c("auto", "yes", "no"), arg_nm = "sparse")

if (length(col_names) > 0) {
## I hate doing this but currently we are going to have
Expand Down Expand Up @@ -301,7 +302,7 @@ bake.step_dummy <- function(object, new_data, ...) {
ordered = is_ordered
)

if (object$sparse) {
if (object$sparse == "yes") {
current_contrast <- getOption("contrasts")[is_ordered + 1]
if (current_contrast != "contr.treatment") {
cli::cli_abort(
Expand Down
9 changes: 5 additions & 4 deletions man/step_dummy.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions tests/testthat/_snaps/dummy.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,10 @@
Caused by error in `bake()`:
! Only one factor level in `x`: "only-level".

# sparse = TRUE errors on unsupported contrasts
# sparse = 'yes' errors on unsupported contrasts

Code
recipe(~., data = tibble(x = letters)) %>% step_dummy(x, sparse = TRUE) %>%
recipe(~., data = tibble(x = letters)) %>% step_dummy(x, sparse = "yes") %>%
prep()
Condition
Error in `step_dummy()`:
Expand Down
10 changes: 5 additions & 5 deletions tests/testthat/test-dummy.R
Original file line number Diff line number Diff line change
Expand Up @@ -354,13 +354,13 @@ test_that("throws an informative error for single level", {
)
})

test_that("sparse = TRUE works", {
test_that("sparse = 'yes' works", {
rec <- recipe(~ ., data = tibble(x = c(NA, letters)))

suppressWarnings({
dense <- rec %>% step_dummy(x, sparse = FALSE) %>% prep() %>% bake(NULL)
dense <- rec %>% step_dummy(x, sparse = "no") %>% prep() %>% bake(NULL)
dense <- purrr::map(dense, as.integer) %>% tibble::new_tibble()
sparse <- rec %>% step_dummy(x, sparse = TRUE) %>% prep() %>% bake(NULL)
sparse <- rec %>% step_dummy(x, sparse = "yes") %>% prep() %>% bake(NULL)
})

expect_identical(dense, sparse)
Expand All @@ -369,15 +369,15 @@ test_that("sparse = TRUE works", {
expect_true(all(vapply(sparse, sparsevctrs::is_sparse_vector, logical(1))))
})

test_that("sparse = TRUE errors on unsupported contrasts", {
test_that("sparse = 'yes' errors on unsupported contrasts", {
go_helmert <- getOption("contrasts")
go_helmert["unordered"] <- "contr.helmert"
withr::local_options(contrasts = go_helmert)

expect_snapshot(
error = TRUE,
recipe(~ ., data = tibble(x = letters)) %>%
step_dummy(x, sparse = TRUE) %>%
step_dummy(x, sparse = "yes") %>%
prep()
)
})
Expand Down

0 comments on commit f58f4fb

Please sign in to comment.