diff --git a/NEWS.md b/NEWS.md index e4aa2cf4..46a988c5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,8 @@ - new argument `model_matrix_attr` in `tidy_and_attach()` and `tidy_plus_plus()` to attach model frame and model matrix to the model as attributes for saving some execution time (#254) +- `tidy_add_n()` now returns `n_ind` the number of individuals, in addition to + the number of observations (#251) - by default, `tidy_parameters()` calls now `parameters::model_parameters()` with `pretty_names = FALSE` for saving execution time (#259) diff --git a/R/broom.helpers-package.R b/R/broom.helpers-package.R index cfe35c37..0f141d67 100644 --- a/R/broom.helpers-package.R +++ b/R/broom.helpers-package.R @@ -53,7 +53,7 @@ utils::globalVariables(c(".", "where")) "y.level", "component", "term", "original_term", "variable", "var_label", "var_class", "var_type", "var_nlevels", "header_row", "contrasts", "contrasts_type", - "reference_row", "label", "n_obs", "n_event", "exposure" + "reference_row", "label", "n_obs", "n_ind", "n_event", "exposure" ) ), dplyr::everything() @@ -67,7 +67,7 @@ utils::globalVariables(c(".", "where")) names(.attributes), c( "exponentiate", "conf.level", "coefficients_type", "coefficients_label", - "variable_labels", "term_labels", "N_obs", "N_event", "Exposure", + "variable_labels", "term_labels", "N_obs", "N_ind", "N_event", "Exposure", "force_contr.treatment", "skip_add_reference_rows", "find_missing_interaction_terms", "component" ) diff --git a/R/model_get_n.R b/R/model_get_n.R index 7c85a590..696c4170 100644 --- a/R/model_get_n.R +++ b/R/model_get_n.R @@ -6,16 +6,17 @@ #' For Poisson models, will return the number of events and exposure time #' (defined with [stats::offset()]). #' -#' For Cox models ([survival::coxph()]), will return the number of events and -#' exposure time. +#' For Cox models ([survival::coxph()]), will return the number of events, +#' exposure time and the number of individuals. #' #' For competing risk regression models ([tidycmprsk::crr()]), `n_event` takes #' into account only the event of interest defined by `failcode.` #' #' See [tidy_add_n()] for more details. #' -#' The total number of observations (`N_obs`), of events (`N_event`) and of -#' exposure time (`Exposure`) are stored as attributes of the returned tibble. +#' The total number of observations (`N_obs`), of individuals (`N_ind`), of +#' events (`N_event`) and of exposure time (`Exposure`) are stored as attributes +#' of the returned tibble. #' #' This function does not cover `lavaan` models (`NULL` is returned). #' @@ -193,6 +194,15 @@ model_get_n.coxph <- function(model) { ) attr(n, "N_obs") <- sum(w) + mf <- stats::model.frame(model) # using stats::model.frame() to get (id) + if (!"(id)" %in% names(mf)) + mf[["(id)"]] <- seq_len(nrow(mf)) + n_obs_per_ind <- mf %>% + dplyr::add_count(dplyr::pick("(id)")) |> + dplyr::pull("n") + n$n_ind <- colSums(tcm * w / n_obs_per_ind) + attr(n, "N_ind") <- sum(w / n_obs_per_ind) + y <- model %>% model_get_response() status <- y[, ncol(y)] if (ncol(y) == 3) { diff --git a/R/tidy_add_n.R b/R/tidy_add_n.R index a0313b44..f0005d73 100644 --- a/R/tidy_add_n.R +++ b/R/tidy_add_n.R @@ -40,10 +40,11 @@ #' obtained with `n_event / exposure`. #' #' For Cox models ([survival::coxph()]), an individual could be coded -#' with several observations (several rows). `n_obs` will correspond to the weighted -#' number of observations which could be different from the number of -#' individuals. `tidy_add_n()` will also compute a (weighted) number of events -#' (`n_event`) according to the definition of the [survival::Surv()] object. +#' with several observations (several rows). `n_obs` will correspond to the +#' weighted number of observations which could be different from the number of +#' individuals `n_ind`. `tidy_add_n()` will also compute a (weighted) number of +#' events (`n_event`) according to the definition of the [survival::Surv()] +#' object. #' Exposure time is also returned in `exposure` column. It is equal to the #' (weighted) sum of the time variable if only one variable time is passed to #' [survival::Surv()], and to the (weighted) sum of `time2 - time` if two time @@ -52,9 +53,9 @@ #' For competing risk regression models ([tidycmprsk::crr()]), `n_event` takes #' into account only the event of interest defined by `failcode.` #' -#' The (weighted) total number of observations (`N_obs`), of events (`N_event`) and -#' of exposure time (`Exposure`) are stored as attributes of the returned -#' tibble. +#' The (weighted) total number of observations (`N_obs`), of individuals +#' (`N_ind`), of events (`N_event`) and of exposure time (`Exposure`) are +#' stored as attributes of the returned tibble. #' #' @param x a tidy tibble #' @param model the corresponding model, if not attached to `x` @@ -140,6 +141,9 @@ tidy_add_n <- function(x, model = tidy_get_model(x)) { if (!is.null(attr(n, "N_obs"))) { .attributes$N_obs <- attr(n, "N_obs") } + if (!is.null(attr(n, "N_ind"))) { + .attributes$N_ind <- attr(n, "N_ind") + } if (!is.null(attr(n, "N_event"))) { .attributes$N_event <- attr(n, "N_event") } diff --git a/man/model_get_n.Rd b/man/model_get_n.Rd index 5848a171..927a51be 100644 --- a/man/model_get_n.Rd +++ b/man/model_get_n.Rd @@ -44,16 +44,17 @@ the number of events. For Poisson models, will return the number of events and exposure time (defined with \code{\link[stats:offset]{stats::offset()}}). -For Cox models (\code{\link[survival:coxph]{survival::coxph()}}), will return the number of events and -exposure time. +For Cox models (\code{\link[survival:coxph]{survival::coxph()}}), will return the number of events, +exposure time and the number of individuals. For competing risk regression models (\code{\link[tidycmprsk:crr]{tidycmprsk::crr()}}), \code{n_event} takes into account only the event of interest defined by \code{failcode.} See \code{\link[=tidy_add_n]{tidy_add_n()}} for more details. -The total number of observations (\code{N_obs}), of events (\code{N_event}) and of -exposure time (\code{Exposure}) are stored as attributes of the returned tibble. +The total number of observations (\code{N_obs}), of individuals (\code{N_ind}), of +events (\code{N_event}) and of exposure time (\code{Exposure}) are stored as attributes +of the returned tibble. This function does not cover \code{lavaan} models (\code{NULL} is returned). } diff --git a/man/tidy_add_n.Rd b/man/tidy_add_n.Rd index dc0d3651..04410269 100644 --- a/man/tidy_add_n.Rd +++ b/man/tidy_add_n.Rd @@ -53,10 +53,11 @@ as \code{glm(y ~ x + offset(log(z)), family = poisson)}). Observed rates could b obtained with \code{n_event / exposure}. For Cox models (\code{\link[survival:coxph]{survival::coxph()}}), an individual could be coded -with several observations (several rows). \code{n_obs} will correspond to the weighted -number of observations which could be different from the number of -individuals. \code{tidy_add_n()} will also compute a (weighted) number of events -(\code{n_event}) according to the definition of the \code{\link[survival:Surv]{survival::Surv()}} object. +with several observations (several rows). \code{n_obs} will correspond to the +weighted number of observations which could be different from the number of +individuals \code{n_ind}. \code{tidy_add_n()} will also compute a (weighted) number of +events (\code{n_event}) according to the definition of the \code{\link[survival:Surv]{survival::Surv()}} +object. Exposure time is also returned in \code{exposure} column. It is equal to the (weighted) sum of the time variable if only one variable time is passed to \code{\link[survival:Surv]{survival::Surv()}}, and to the (weighted) sum of \code{time2 - time} if two time @@ -65,9 +66,9 @@ variables are defined in \code{\link[survival:Surv]{survival::Surv()}}. For competing risk regression models (\code{\link[tidycmprsk:crr]{tidycmprsk::crr()}}), \code{n_event} takes into account only the event of interest defined by \code{failcode.} -The (weighted) total number of observations (\code{N_obs}), of events (\code{N_event}) and -of exposure time (\code{Exposure}) are stored as attributes of the returned -tibble. +The (weighted) total number of observations (\code{N_obs}), of individuals +(\code{N_ind}), of events (\code{N_event}) and of exposure time (\code{Exposure}) are +stored as attributes of the returned tibble. } \examples{ \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} diff --git a/tests/testthat/test-add_n.R b/tests/testthat/test-add_n.R index 9b485f3e..bad04254 100644 --- a/tests/testthat/test-add_n.R +++ b/tests/testthat/test-add_n.R @@ -174,7 +174,9 @@ test_that("tidy_add_n() works with survival::coxph", { skip_on_cran() df <- survival::lung %>% dplyr::mutate(sex = factor(sex)) mod <- survival::coxph(survival::Surv(time, status) ~ ph.ecog + age + sex, data = df) - expect_error(mod %>% tidy_and_attach() %>% tidy_add_n(), NA) + expect_error(res <- mod %>% tidy_and_attach() %>% tidy_add_n(), NA) + expect_equivalent(res$n_ind, c(227, 227, 90)) + expect_equivalent(attr(res, "N_ind"), 227) }) test_that("tidy_add_n() works with survival::survreg", { diff --git a/tests/testthat/test-model_get_n.R b/tests/testthat/test-model_get_n.R index b6d6492d..76e758cd 100644 --- a/tests/testthat/test-model_get_n.R +++ b/tests/testthat/test-model_get_n.R @@ -240,7 +240,10 @@ test_that("model_get_n() works with survival::coxph", { df <- survival::lung %>% dplyr::mutate(sex = factor(sex)) mod <- survival::coxph(survival::Surv(time, status) ~ ph.ecog + age + sex, data = df) expect_error(res <- mod %>% model_get_n(), NA) - expect_equivalent(names(res), c("term", "n_obs", "n_event", "exposure")) + expect_equivalent( + names(res), + c("term", "n_obs", "n_ind", "n_event", "exposure") + ) test <- list( start = c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8), @@ -250,8 +253,12 @@ test_that("model_get_n() works with survival::coxph", { ) mod <- survival::coxph(survival::Surv(start, stop, event) ~ x, test) expect_error(res <- mod %>% model_get_n(), NA) - expect_equivalent(names(res), c("term", "n_obs", "n_event", "exposure")) + expect_equivalent( + names(res), + c("term", "n_obs", "n_ind", "n_event", "exposure") + ) expect_equivalent(res$n_obs, c(10, 10)) + expect_equivalent(res$n_ind, c(10, 10)) expect_equivalent(res$n_event, c(7, 7)) expect_equivalent(res$exposure, c(43, 43)) }) @@ -264,7 +271,10 @@ test_that("model_get_n() works with survival::survreg", { dist = "exponential" ) expect_error(res <- mod %>% model_get_n(), NA) - expect_equivalent(names(res), c("term", "n_obs", "n_event", "exposure")) + expect_equivalent( + names(res), + c("term", "n_obs", "n_ind", "n_event", "exposure") + ) }) test_that("model_get_n() works with nnet::multinom", { @@ -401,7 +411,10 @@ test_that("model_get_n() works with tidycmprsk::crr", { skip_on_cran() skip_if_not_installed("tidycmprsk") - mod <- tidycmprsk::crr(Surv(ttdeath, death_cr) ~ age + grade, tidycmprsk::trial) + mod <- tidycmprsk::crr( + survival::Surv(ttdeath, death_cr) ~ age + grade, + tidycmprsk::trial + ) res <- mod %>% tidy_plus_plus() expect_equivalent( res$n_event, diff --git a/vignettes/tidy.Rmd b/vignettes/tidy.Rmd index df2a5884..3d41ae96 100644 --- a/vignettes/tidy.Rmd +++ b/vignettes/tidy.Rmd @@ -308,6 +308,7 @@ tibble::tribble( "label", "`tidy_add_term_labels()`", "String of term labels based on (1) labels provided in `labels` argument if provided; (2) factor levels for categorical variables coded with treatment, SAS or sum contrasts; (3) variable labels when there is only one term per variable; and (4) term name otherwise.
Require \"variable_label\" column. If needed, will automatically apply `tidy_add_variable_labels()`.
Require \"contrasts\" column. If needed, will automatically apply `tidy_add_contrasts()`.
", "header_row", "`tidy_add_header_rows()`", "Logical indicating if a row is a header row for variables with several terms. Is equal to `NA` for variables who do not have an header row.
Require \"label\" column. If needed, will automatically apply `tidy_add_term_labels()`.
It is better to apply `tidy_add_header_rows()` after other `tidy_*` functions
", "n_obs", "`tidy_add_n()`", "Number of observations", + "n_ind", "`tidy_add_n()`", "Number of individuals (for Cox models)", "n_event", "`tidy_add_n()`", "Number of events (for binomial and multinomial logistic models, Poisson and Cox models)", "exposure", "`tidy_add_n()`", "Exposure time (for Poisson and Cox models)" ) %>% @@ -346,6 +347,7 @@ tibble::tribble( "Custom term labels passed to `tidy_add_term_labels()`", "N_obs", "`tidy_add_n()`", "Total number of observations", "N_event", "`tidy_add_n()`", "Total number of events", + "N_ind", "`tidy_add_n()`", "Total number of individuals (for Cox models)", "Exposure", "`tidy_add_n()`", "Total of exposure time", "component", "`tidy_zeroinfl()`", "`component` argument passed to `tidy_zeroinfl()`" ) %>%