From 871e521c0cab269b453139e79a507433c5333a6c Mon Sep 17 00:00:00 2001 From: Joseph Larmarange Date: Thu, 11 Apr 2024 14:33:47 +0200 Subject: [PATCH 1/5] datagridcf is deprecated fix #250 --- R/marginal_tidiers.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/marginal_tidiers.R b/R/marginal_tidiers.R index 4017d6b0..2e35efad 100644 --- a/R/marginal_tidiers.R +++ b/R/marginal_tidiers.R @@ -1029,7 +1029,8 @@ tidy_marginal_contrasts <- function(x, variables_list = "auto", if (!is.null(variables$by) && is.null(dots$newdata)) { args <- variables$by args$model <- dots$model - dots$newdata <- do.call(marginaleffects::datagridcf, args) + args$grid_type <- "counterfactual" + dots$newdata <- do.call(marginaleffects::datagrid, args) } if (!is.null(variables$by) && identical(dots$newdata, "mean")) { From 5310b907bac75904f7817ba05ac60ebf8241b287 Mon Sep 17 00:00:00 2001 From: Joseph Larmarange Date: Thu, 11 Apr 2024 15:53:51 +0200 Subject: [PATCH 2/5] compute the number of individuals for coxph fix #249 --- R/broom.helpers-package.R | 4 ++-- R/model_get_n.R | 18 ++++++++++++++---- R/tidy_add_n.R | 9 +++++---- man/model_get_n.Rd | 9 +++++---- man/tidy_add_n.Rd | 9 +++++---- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/R/broom.helpers-package.R b/R/broom.helpers-package.R index cfe35c37..0f141d67 100644 --- a/R/broom.helpers-package.R +++ b/R/broom.helpers-package.R @@ -53,7 +53,7 @@ utils::globalVariables(c(".", "where")) "y.level", "component", "term", "original_term", "variable", "var_label", "var_class", "var_type", "var_nlevels", "header_row", "contrasts", "contrasts_type", - "reference_row", "label", "n_obs", "n_event", "exposure" + "reference_row", "label", "n_obs", "n_ind", "n_event", "exposure" ) ), dplyr::everything() @@ -67,7 +67,7 @@ utils::globalVariables(c(".", "where")) names(.attributes), c( "exponentiate", "conf.level", "coefficients_type", "coefficients_label", - "variable_labels", "term_labels", "N_obs", "N_event", "Exposure", + "variable_labels", "term_labels", "N_obs", "N_ind", "N_event", "Exposure", "force_contr.treatment", "skip_add_reference_rows", "find_missing_interaction_terms", "component" ) diff --git a/R/model_get_n.R b/R/model_get_n.R index 7c85a590..696c4170 100644 --- a/R/model_get_n.R +++ b/R/model_get_n.R @@ -6,16 +6,17 @@ #' For Poisson models, will return the number of events and exposure time #' (defined with [stats::offset()]). #' -#' For Cox models ([survival::coxph()]), will return the number of events and -#' exposure time. +#' For Cox models ([survival::coxph()]), will return the number of events, +#' exposure time and the number of individuals. #' #' For competing risk regression models ([tidycmprsk::crr()]), `n_event` takes #' into account only the event of interest defined by `failcode.` #' #' See [tidy_add_n()] for more details. #' -#' The total number of observations (`N_obs`), of events (`N_event`) and of -#' exposure time (`Exposure`) are stored as attributes of the returned tibble. +#' The total number of observations (`N_obs`), of individuals (`N_ind`), of +#' events (`N_event`) and of exposure time (`Exposure`) are stored as attributes +#' of the returned tibble. #' #' This function does not cover `lavaan` models (`NULL` is returned). #' @@ -193,6 +194,15 @@ model_get_n.coxph <- function(model) { ) attr(n, "N_obs") <- sum(w) + mf <- stats::model.frame(model) # using stats::model.frame() to get (id) + if (!"(id)" %in% names(mf)) + mf[["(id)"]] <- seq_len(nrow(mf)) + n_obs_per_ind <- mf %>% + dplyr::add_count(dplyr::pick("(id)")) |> + dplyr::pull("n") + n$n_ind <- colSums(tcm * w / n_obs_per_ind) + attr(n, "N_ind") <- sum(w / n_obs_per_ind) + y <- model %>% model_get_response() status <- y[, ncol(y)] if (ncol(y) == 3) { diff --git a/R/tidy_add_n.R b/R/tidy_add_n.R index a0313b44..cef18811 100644 --- a/R/tidy_add_n.R +++ b/R/tidy_add_n.R @@ -40,10 +40,11 @@ #' obtained with `n_event / exposure`. #' #' For Cox models ([survival::coxph()]), an individual could be coded -#' with several observations (several rows). `n_obs` will correspond to the weighted -#' number of observations which could be different from the number of -#' individuals. `tidy_add_n()` will also compute a (weighted) number of events -#' (`n_event`) according to the definition of the [survival::Surv()] object. +#' with several observations (several rows). `n_obs` will correspond to the +#' weighted number of observations which could be different from the number of +#' individuals `n_ind`. `tidy_add_n()` will also compute a (weighted) number of +#' events (`n_event`) according to the definition of the [survival::Surv()] +#' object. #' Exposure time is also returned in `exposure` column. It is equal to the #' (weighted) sum of the time variable if only one variable time is passed to #' [survival::Surv()], and to the (weighted) sum of `time2 - time` if two time diff --git a/man/model_get_n.Rd b/man/model_get_n.Rd index 5848a171..927a51be 100644 --- a/man/model_get_n.Rd +++ b/man/model_get_n.Rd @@ -44,16 +44,17 @@ the number of events. For Poisson models, will return the number of events and exposure time (defined with \code{\link[stats:offset]{stats::offset()}}). -For Cox models (\code{\link[survival:coxph]{survival::coxph()}}), will return the number of events and -exposure time. +For Cox models (\code{\link[survival:coxph]{survival::coxph()}}), will return the number of events, +exposure time and the number of individuals. For competing risk regression models (\code{\link[tidycmprsk:crr]{tidycmprsk::crr()}}), \code{n_event} takes into account only the event of interest defined by \code{failcode.} See \code{\link[=tidy_add_n]{tidy_add_n()}} for more details. -The total number of observations (\code{N_obs}), of events (\code{N_event}) and of -exposure time (\code{Exposure}) are stored as attributes of the returned tibble. +The total number of observations (\code{N_obs}), of individuals (\code{N_ind}), of +events (\code{N_event}) and of exposure time (\code{Exposure}) are stored as attributes +of the returned tibble. This function does not cover \code{lavaan} models (\code{NULL} is returned). } diff --git a/man/tidy_add_n.Rd b/man/tidy_add_n.Rd index dc0d3651..a2dcaa5e 100644 --- a/man/tidy_add_n.Rd +++ b/man/tidy_add_n.Rd @@ -53,10 +53,11 @@ as \code{glm(y ~ x + offset(log(z)), family = poisson)}). Observed rates could b obtained with \code{n_event / exposure}. For Cox models (\code{\link[survival:coxph]{survival::coxph()}}), an individual could be coded -with several observations (several rows). \code{n_obs} will correspond to the weighted -number of observations which could be different from the number of -individuals. \code{tidy_add_n()} will also compute a (weighted) number of events -(\code{n_event}) according to the definition of the \code{\link[survival:Surv]{survival::Surv()}} object. +with several observations (several rows). \code{n_obs} will correspond to the +weighted number of observations which could be different from the number of +individuals \code{n_ind}. \code{tidy_add_n()} will also compute a (weighted) number of +events (\code{n_event}) according to the definition of the \code{\link[survival:Surv]{survival::Surv()}} +object. Exposure time is also returned in \code{exposure} column. It is equal to the (weighted) sum of the time variable if only one variable time is passed to \code{\link[survival:Surv]{survival::Surv()}}, and to the (weighted) sum of \code{time2 - time} if two time From f98313db28b751d05118a5115fcb75e13b45ace8 Mon Sep 17 00:00:00 2001 From: Joseph Larmarange Date: Sat, 27 Jul 2024 13:55:25 +0200 Subject: [PATCH 3/5] NEWS and doc --- NEWS.md | 2 ++ vignettes/tidy.Rmd | 2 ++ 2 files changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index e142cad8..53bc49fd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,8 @@ - new argument `model_matrix_attr` in `tidy_and_attach()` and `tidy_plus_plus()` to attach model frame and model matrix to the model as attributes for saving some execution time (#254) +- `tidy_add_n()` now returns `n_ind` the number of individuals, in addition to + the number of observations (#251) **Deprecated support** diff --git a/vignettes/tidy.Rmd b/vignettes/tidy.Rmd index df2a5884..3d41ae96 100644 --- a/vignettes/tidy.Rmd +++ b/vignettes/tidy.Rmd @@ -308,6 +308,7 @@ tibble::tribble( "label", "`tidy_add_term_labels()`", "String of term labels based on (1) labels provided in `labels` argument if provided; (2) factor levels for categorical variables coded with treatment, SAS or sum contrasts; (3) variable labels when there is only one term per variable; and (4) term name otherwise.
Require \"variable_label\" column. If needed, will automatically apply `tidy_add_variable_labels()`.
Require \"contrasts\" column. If needed, will automatically apply `tidy_add_contrasts()`.
", "header_row", "`tidy_add_header_rows()`", "Logical indicating if a row is a header row for variables with several terms. Is equal to `NA` for variables who do not have an header row.
Require \"label\" column. If needed, will automatically apply `tidy_add_term_labels()`.
It is better to apply `tidy_add_header_rows()` after other `tidy_*` functions
", "n_obs", "`tidy_add_n()`", "Number of observations", + "n_ind", "`tidy_add_n()`", "Number of individuals (for Cox models)", "n_event", "`tidy_add_n()`", "Number of events (for binomial and multinomial logistic models, Poisson and Cox models)", "exposure", "`tidy_add_n()`", "Exposure time (for Poisson and Cox models)" ) %>% @@ -346,6 +347,7 @@ tibble::tribble( "Custom term labels passed to `tidy_add_term_labels()`", "N_obs", "`tidy_add_n()`", "Total number of observations", "N_event", "`tidy_add_n()`", "Total number of events", + "N_ind", "`tidy_add_n()`", "Total number of individuals (for Cox models)", "Exposure", "`tidy_add_n()`", "Total of exposure time", "component", "`tidy_zeroinfl()`", "`component` argument passed to `tidy_zeroinfl()`" ) %>% From 649dadec4cc716d5182df4aa0f5f08019f294ab1 Mon Sep 17 00:00:00 2001 From: Joseph Larmarange Date: Sat, 27 Jul 2024 14:01:31 +0200 Subject: [PATCH 4/5] pass N_ind and tests --- R/tidy_add_n.R | 9 ++++++--- man/tidy_add_n.Rd | 6 +++--- tests/testthat/test-add_n.R | 4 +++- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/R/tidy_add_n.R b/R/tidy_add_n.R index cef18811..f0005d73 100644 --- a/R/tidy_add_n.R +++ b/R/tidy_add_n.R @@ -53,9 +53,9 @@ #' For competing risk regression models ([tidycmprsk::crr()]), `n_event` takes #' into account only the event of interest defined by `failcode.` #' -#' The (weighted) total number of observations (`N_obs`), of events (`N_event`) and -#' of exposure time (`Exposure`) are stored as attributes of the returned -#' tibble. +#' The (weighted) total number of observations (`N_obs`), of individuals +#' (`N_ind`), of events (`N_event`) and of exposure time (`Exposure`) are +#' stored as attributes of the returned tibble. #' #' @param x a tidy tibble #' @param model the corresponding model, if not attached to `x` @@ -141,6 +141,9 @@ tidy_add_n <- function(x, model = tidy_get_model(x)) { if (!is.null(attr(n, "N_obs"))) { .attributes$N_obs <- attr(n, "N_obs") } + if (!is.null(attr(n, "N_ind"))) { + .attributes$N_ind <- attr(n, "N_ind") + } if (!is.null(attr(n, "N_event"))) { .attributes$N_event <- attr(n, "N_event") } diff --git a/man/tidy_add_n.Rd b/man/tidy_add_n.Rd index a2dcaa5e..04410269 100644 --- a/man/tidy_add_n.Rd +++ b/man/tidy_add_n.Rd @@ -66,9 +66,9 @@ variables are defined in \code{\link[survival:Surv]{survival::Surv()}}. For competing risk regression models (\code{\link[tidycmprsk:crr]{tidycmprsk::crr()}}), \code{n_event} takes into account only the event of interest defined by \code{failcode.} -The (weighted) total number of observations (\code{N_obs}), of events (\code{N_event}) and -of exposure time (\code{Exposure}) are stored as attributes of the returned -tibble. +The (weighted) total number of observations (\code{N_obs}), of individuals +(\code{N_ind}), of events (\code{N_event}) and of exposure time (\code{Exposure}) are +stored as attributes of the returned tibble. } \examples{ \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} diff --git a/tests/testthat/test-add_n.R b/tests/testthat/test-add_n.R index 9b485f3e..bad04254 100644 --- a/tests/testthat/test-add_n.R +++ b/tests/testthat/test-add_n.R @@ -174,7 +174,9 @@ test_that("tidy_add_n() works with survival::coxph", { skip_on_cran() df <- survival::lung %>% dplyr::mutate(sex = factor(sex)) mod <- survival::coxph(survival::Surv(time, status) ~ ph.ecog + age + sex, data = df) - expect_error(mod %>% tidy_and_attach() %>% tidy_add_n(), NA) + expect_error(res <- mod %>% tidy_and_attach() %>% tidy_add_n(), NA) + expect_equivalent(res$n_ind, c(227, 227, 90)) + expect_equivalent(attr(res, "N_ind"), 227) }) test_that("tidy_add_n() works with survival::survreg", { From 111b3478cd3af980f57f902ed5c8c21a94ce63c8 Mon Sep 17 00:00:00 2001 From: Joseph Larmarange Date: Sat, 27 Jul 2024 14:15:26 +0200 Subject: [PATCH 5/5] additional tests --- tests/testthat/test-model_get_n.R | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-model_get_n.R b/tests/testthat/test-model_get_n.R index b6d6492d..76e758cd 100644 --- a/tests/testthat/test-model_get_n.R +++ b/tests/testthat/test-model_get_n.R @@ -240,7 +240,10 @@ test_that("model_get_n() works with survival::coxph", { df <- survival::lung %>% dplyr::mutate(sex = factor(sex)) mod <- survival::coxph(survival::Surv(time, status) ~ ph.ecog + age + sex, data = df) expect_error(res <- mod %>% model_get_n(), NA) - expect_equivalent(names(res), c("term", "n_obs", "n_event", "exposure")) + expect_equivalent( + names(res), + c("term", "n_obs", "n_ind", "n_event", "exposure") + ) test <- list( start = c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8), @@ -250,8 +253,12 @@ test_that("model_get_n() works with survival::coxph", { ) mod <- survival::coxph(survival::Surv(start, stop, event) ~ x, test) expect_error(res <- mod %>% model_get_n(), NA) - expect_equivalent(names(res), c("term", "n_obs", "n_event", "exposure")) + expect_equivalent( + names(res), + c("term", "n_obs", "n_ind", "n_event", "exposure") + ) expect_equivalent(res$n_obs, c(10, 10)) + expect_equivalent(res$n_ind, c(10, 10)) expect_equivalent(res$n_event, c(7, 7)) expect_equivalent(res$exposure, c(43, 43)) }) @@ -264,7 +271,10 @@ test_that("model_get_n() works with survival::survreg", { dist = "exponential" ) expect_error(res <- mod %>% model_get_n(), NA) - expect_equivalent(names(res), c("term", "n_obs", "n_event", "exposure")) + expect_equivalent( + names(res), + c("term", "n_obs", "n_ind", "n_event", "exposure") + ) }) test_that("model_get_n() works with nnet::multinom", { @@ -401,7 +411,10 @@ test_that("model_get_n() works with tidycmprsk::crr", { skip_on_cran() skip_if_not_installed("tidycmprsk") - mod <- tidycmprsk::crr(Surv(ttdeath, death_cr) ~ age + grade, tidycmprsk::trial) + mod <- tidycmprsk::crr( + survival::Surv(ttdeath, death_cr) ~ age + grade, + tidycmprsk::trial + ) res <- mod %>% tidy_plus_plus() expect_equivalent( res$n_event,