Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use mutate(.keep = "none") in transmute() #483

Merged
merged 12 commits into from
Jan 24, 2025
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

* `print.dtplyr_step()` gains `n`, `max_extra_cols`, and `max_footer_lines` args (#464)

* `transmute()` preserves row count and avoids unnecessary copies (#470)

# dtplyr 1.3.1

* Fix for failing R CMD check.
Expand Down
38 changes: 1 addition & 37 deletions R/step-subset-transmute.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,41 +13,5 @@
#' dt <- lazy_dt(dplyr::starwars)
#' dt %>% transmute(name, sh = paste0(species, "/", homeworld))
transmute.dtplyr_step <- function(.data, ...) {
dots <- capture_new_vars(.data, ...)
dots_list <- process_new_vars(.data, dots)
dots <- dots_list$dots

groups <- group_vars(.data)
if (!is_empty(groups)) {
# TODO could check if there is actually anything mutated, e.g. to avoid
# DT[, .(x = x)]
is_group_var <- names(dots) %in% groups
group_dots <- dots[is_group_var]

.data <- mutate(ungroup(.data), !!!group_dots)
.data <- group_by(.data, !!!syms(groups))

dots <- dots[!is_group_var]
}

if (is_empty(dots)) {
# grouping variables have been removed from `dots` so `select()` would
# produce a message "Adding grouping vars".
# As `dplyr::transmute()` doesn't generate a message when adding group vars
# we can also leave it away here
return(select(.data, !!!group_vars(.data)))
}

if (!dots_list$use_braces) {
j <- call2(".", !!!dots)
} else {
j <- mutate_with_braces(dots)$expr
}
vars <- union(group_vars(.data), names(dots))
out <- step_subset_j(.data, vars = vars, j = j)
if (dots_list$need_removal_step) {
out <- select(out, -tidyselect::all_of(dots_list$vars_removed))
}

out
mutate(.data, ..., .keep = "none")
markfairbanks marked this conversation as resolved.
Show resolved Hide resolved
}
7 changes: 7 additions & 0 deletions tests/testthat/_snaps/step-call.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@
Output
setnames(copy(DT), c("a", "b", "c"), toupper)

# can compute distinct computed variables

Code
dt %>% distinct(z = x + y) %>% show_query()
Output
unique(copy(dt)[, `:=`(z = x + y)][, `:=`(c("x", "y"), NULL)])

# errors are raised

Code
Expand Down
5 changes: 2 additions & 3 deletions tests/testthat/test-step-call.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,8 @@ test_that("keeps all variables if requested", {
test_that("can compute distinct computed variables", {
dt <- lazy_dt(data.table(x = c(1, 1), y = c(1, 2)), "dt")

expect_equal(
dt %>% distinct(z = x + y) %>% show_query(),
expr(unique(dt[, .(z = x + y)]))
expect_snapshot(
dt %>% distinct(z = x + y) %>% show_query()
)

expect_equal(
Expand Down
5 changes: 0 additions & 5 deletions tests/testthat/test-step-mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,6 @@ test_that("generates single calls as expect", {
dt %>% group_by(x) %>% mutate(x2 = x * 2) %>% show_query(),
expr(copy(DT)[, `:=`(x2 = x * 2), by = .(x)])
)

expect_equal(
dt %>% transmute(x2 = x * 2) %>% show_query(),
expr(DT[, .(x2 = x * 2)])
)
})

test_that("mutate generates compound expression if needed", {
Expand Down
5 changes: 0 additions & 5 deletions tests/testthat/test-step-subset-summarise.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,6 @@ test_that("simple calls generate expected translations", {
dt %>% summarise(x = mean(x)) %>% show_query(),
expr(DT[, .(x = mean(x))])
)

expect_equal(
dt %>% transmute(x) %>% show_query(),
expr(DT[, .(x = x)])
)
})

test_that("can use with across", {
Expand Down
247 changes: 8 additions & 239 deletions tests/testthat/test-step-subset-transmute.R
Original file line number Diff line number Diff line change
@@ -1,249 +1,18 @@
test_that("simple calls generate expected translations", {
test_that("works", {
dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT")

expect_equal(
dt %>% transmute(x) %>% show_query(),
expr(DT[, .(x = x)])
dt %>% transmute(x) %>% collect(),
dt %>% mutate(x, .keep = "none") %>% collect()
)
})

test_that("transmute generates compound expression if needed", {
dt <- lazy_dt(data.table(x = 1, y = 2), "DT")
test_that("empty dots preserves groups", {
dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") %>%
group_by(y)

expect_equal(
dt %>% transmute(x2 = x * 2, x4 = x2 * 2) %>% show_query(),
expr(DT[, {
x2 <- x * 2
x4 <- x2 * 2
.(x2, x4)
}])
)
})

test_that("allows multiple assignment to the same variable", {
dt <- lazy_dt(data.table(x = 1, y = 2), "DT")

# when nested
expect_equal(
dt %>% transmute(x = x * 2, x = x * 2) %>% show_query(),
expr(DT[, {
x <- x * 2
x <- x * 2
.(x)
}])
)

# when not nested
expect_equal(
dt %>% transmute(z = 2, y = 3) %>% show_query(),
expr(DT[, .(z = 2, y = 3)])
)
})


test_that("groups are respected", {
dt <- lazy_dt(data.table(x = 1), "DT") %>% group_by(x) %>% transmute(y = 2)

expect_equal(dt$vars, c("x", "y"))
expect_equal(
dt %>% show_query(),
expr(DT[, .(y = 2), keyby = .(x)])
)
})

test_that("grouping vars can be transmuted", {
dt <- lazy_dt(data.table(x = 1), "DT") %>% group_by(x) %>% transmute(x = x + 1, y = 2)

expect_equal(dt$vars, c("x", "y"))
expect_equal(dt$groups, "x")
expect_equal(
dt %>% show_query(),
expr(copy(DT)[, `:=`(x = x + 1)][, .(y = 2), keyby = .(x)])
)

skip("transmuting grouping vars with nesting is not supported")
dt <- lazy_dt(data.table(x = 1), "DT") %>%
group_by(x) %>%
transmute(x = x + 1, y = x + 1, x = y + 1)

expect_equal(dt$vars, c("x", "y"))
expect_equal(
dt %>% collect(),
tibble(x = 4, y = 3) %>% group_by(x)
)
})

test_that("empty transmute works", {
dt <- lazy_dt(data.frame(x = 1), "DT")
expect_equal(transmute(dt) %>% show_query(), expr(DT[, 0L]))
expect_equal(transmute(dt)$vars, character())
expect_equal(transmute(dt, !!!list()) %>% show_query(), expr(DT[, 0L]))

dt_grouped <- lazy_dt(data.frame(x = 1), "DT") %>% group_by(x)
expect_equal(transmute(dt_grouped)$vars, "x")
})

test_that("only transmuting groups works", {
dt <- lazy_dt(data.frame(x = 1)) %>% group_by(x)
expect_equal(transmute(dt, x) %>% collect(), dt %>% collect())
expect_equal(transmute(dt, x)$vars, "x")
})

test_that("across() can access previously created variables", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- transmute(dt, y = 2, across(y, sqrt))
expect_equal(
collect(step),
tibble(y = sqrt(2))
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
y <- sqrt(y)
.(y)
}])
)
})

test_that("new columns take precedence over global variables", {
dt <- lazy_dt(data.frame(x = 1), "DT")
y <- 'global var'
step <- transmute(dt, y = 2, z = y + 1)
expect_equal(
collect(step),
tibble(y = 2, z = 3)
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
z <- y + 1
.(y, z)
}])
)
})

# var = NULL -------------------------------------------------------------
res <- dt %>% transmute() %>% collect()

test_that("var = NULL when var is in original data", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- dt %>% transmute(x = 2, z = x*2, x = NULL)
expect_equal(
collect(step),
tibble(z = 4)
)
expect_equal(
step$vars,
"z"
)
expect_equal(
show_query(step),
expr(DT[, {
x <- 2
z <- x * 2
.(x, z)
}][, `:=`("x", NULL)])
)
})

test_that("var = NULL when var is in final output", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- transmute(dt, y = NULL, y = 3)
expect_equal(
collect(step),
tibble(y = 3)
)
expect_equal(
show_query(step),
expr(DT[, {
y <- NULL
y <- 3
.(y)
}])
)
})

test_that("temp var with nested arguments", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- transmute(dt, y = 2, z = y*2, y = NULL)
expect_equal(
collect(step),
tibble(z = 4)
)
expect_equal(
step$vars,
"z"
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
z <- y * 2
.(y, z)
}][, `:=`("y", NULL)])
)
})

test_that("temp var with no new vars added", {
dt <- lazy_dt(data.frame(x = 1), "DT")
step <- transmute(dt, y = 2, y = NULL)
expect_equal(
collect(step),
tibble()
)
expect_equal(
step$vars,
character()
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
.(y)
}][, `:=`("y", NULL)])
)
})

test_that("var = NULL works when data is grouped", {
dt <- lazy_dt(data.frame(x = 1, g = 1), "DT") %>% group_by(g)

# when var is in original data
step <- dt %>% transmute(x = 2, z = x*2, x = NULL)
expect_equal(
collect(step),
tibble(g = 1, z = 4) %>% group_by(g)
)
expect_equal(
step$vars,
c("g", "z")
)
expect_equal(
show_query(step),
expr(DT[, {
x <- 2
z <- x * 2
.(x, z)
}, keyby = .(g)][, `:=`("x", NULL)])
)

# when var is not in original data
step <- transmute(dt, y = 2, z = y*2, y = NULL)
expect_equal(
collect(step),
tibble(g = 1, z = 4) %>% group_by(g)
)
expect_equal(
step$vars,
c("g", "z")
)
expect_equal(
show_query(step),
expr(DT[, {
y <- 2
z <- y * 2
.(y, z)
}, keyby = .(g)][, `:=`("y", NULL)])
)
expect_equal(names(res), "y")
})

Loading