diff --git a/NEWS.md b/NEWS.md
index e4aa2cf4..46a988c5 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -5,6 +5,8 @@
- new argument `model_matrix_attr` in `tidy_and_attach()` and `tidy_plus_plus()`
to attach model frame and model matrix to the model as attributes for saving
some execution time (#254)
+- `tidy_add_n()` now returns `n_ind` the number of individuals, in addition to
+ the number of observations (#251)
- by default, `tidy_parameters()` calls now `parameters::model_parameters()`
with `pretty_names = FALSE` for saving execution time (#259)
diff --git a/R/broom.helpers-package.R b/R/broom.helpers-package.R
index cfe35c37..0f141d67 100644
--- a/R/broom.helpers-package.R
+++ b/R/broom.helpers-package.R
@@ -53,7 +53,7 @@ utils::globalVariables(c(".", "where"))
"y.level", "component", "term", "original_term", "variable",
"var_label", "var_class", "var_type",
"var_nlevels", "header_row", "contrasts", "contrasts_type",
- "reference_row", "label", "n_obs", "n_event", "exposure"
+ "reference_row", "label", "n_obs", "n_ind", "n_event", "exposure"
)
),
dplyr::everything()
@@ -67,7 +67,7 @@ utils::globalVariables(c(".", "where"))
names(.attributes),
c(
"exponentiate", "conf.level", "coefficients_type", "coefficients_label",
- "variable_labels", "term_labels", "N_obs", "N_event", "Exposure",
+ "variable_labels", "term_labels", "N_obs", "N_ind", "N_event", "Exposure",
"force_contr.treatment", "skip_add_reference_rows",
"find_missing_interaction_terms", "component"
)
diff --git a/R/model_get_n.R b/R/model_get_n.R
index 7c85a590..696c4170 100644
--- a/R/model_get_n.R
+++ b/R/model_get_n.R
@@ -6,16 +6,17 @@
#' For Poisson models, will return the number of events and exposure time
#' (defined with [stats::offset()]).
#'
-#' For Cox models ([survival::coxph()]), will return the number of events and
-#' exposure time.
+#' For Cox models ([survival::coxph()]), will return the number of events,
+#' exposure time and the number of individuals.
#'
#' For competing risk regression models ([tidycmprsk::crr()]), `n_event` takes
#' into account only the event of interest defined by `failcode.`
#'
#' See [tidy_add_n()] for more details.
#'
-#' The total number of observations (`N_obs`), of events (`N_event`) and of
-#' exposure time (`Exposure`) are stored as attributes of the returned tibble.
+#' The total number of observations (`N_obs`), of individuals (`N_ind`), of
+#' events (`N_event`) and of exposure time (`Exposure`) are stored as attributes
+#' of the returned tibble.
#'
#' This function does not cover `lavaan` models (`NULL` is returned).
#'
@@ -193,6 +194,15 @@ model_get_n.coxph <- function(model) {
)
attr(n, "N_obs") <- sum(w)
+ mf <- stats::model.frame(model) # using stats::model.frame() to get (id)
+ if (!"(id)" %in% names(mf))
+ mf[["(id)"]] <- seq_len(nrow(mf))
+ n_obs_per_ind <- mf %>%
+ dplyr::add_count(dplyr::pick("(id)")) |>
+ dplyr::pull("n")
+ n$n_ind <- colSums(tcm * w / n_obs_per_ind)
+ attr(n, "N_ind") <- sum(w / n_obs_per_ind)
+
y <- model %>% model_get_response()
status <- y[, ncol(y)]
if (ncol(y) == 3) {
diff --git a/R/tidy_add_n.R b/R/tidy_add_n.R
index a0313b44..f0005d73 100644
--- a/R/tidy_add_n.R
+++ b/R/tidy_add_n.R
@@ -40,10 +40,11 @@
#' obtained with `n_event / exposure`.
#'
#' For Cox models ([survival::coxph()]), an individual could be coded
-#' with several observations (several rows). `n_obs` will correspond to the weighted
-#' number of observations which could be different from the number of
-#' individuals. `tidy_add_n()` will also compute a (weighted) number of events
-#' (`n_event`) according to the definition of the [survival::Surv()] object.
+#' with several observations (several rows). `n_obs` will correspond to the
+#' weighted number of observations which could be different from the number of
+#' individuals `n_ind`. `tidy_add_n()` will also compute a (weighted) number of
+#' events (`n_event`) according to the definition of the [survival::Surv()]
+#' object.
#' Exposure time is also returned in `exposure` column. It is equal to the
#' (weighted) sum of the time variable if only one variable time is passed to
#' [survival::Surv()], and to the (weighted) sum of `time2 - time` if two time
@@ -52,9 +53,9 @@
#' For competing risk regression models ([tidycmprsk::crr()]), `n_event` takes
#' into account only the event of interest defined by `failcode.`
#'
-#' The (weighted) total number of observations (`N_obs`), of events (`N_event`) and
-#' of exposure time (`Exposure`) are stored as attributes of the returned
-#' tibble.
+#' The (weighted) total number of observations (`N_obs`), of individuals
+#' (`N_ind`), of events (`N_event`) and of exposure time (`Exposure`) are
+#' stored as attributes of the returned tibble.
#'
#' @param x a tidy tibble
#' @param model the corresponding model, if not attached to `x`
@@ -140,6 +141,9 @@ tidy_add_n <- function(x, model = tidy_get_model(x)) {
if (!is.null(attr(n, "N_obs"))) {
.attributes$N_obs <- attr(n, "N_obs")
}
+ if (!is.null(attr(n, "N_ind"))) {
+ .attributes$N_ind <- attr(n, "N_ind")
+ }
if (!is.null(attr(n, "N_event"))) {
.attributes$N_event <- attr(n, "N_event")
}
diff --git a/man/model_get_n.Rd b/man/model_get_n.Rd
index 5848a171..927a51be 100644
--- a/man/model_get_n.Rd
+++ b/man/model_get_n.Rd
@@ -44,16 +44,17 @@ the number of events.
For Poisson models, will return the number of events and exposure time
(defined with \code{\link[stats:offset]{stats::offset()}}).
-For Cox models (\code{\link[survival:coxph]{survival::coxph()}}), will return the number of events and
-exposure time.
+For Cox models (\code{\link[survival:coxph]{survival::coxph()}}), will return the number of events,
+exposure time and the number of individuals.
For competing risk regression models (\code{\link[tidycmprsk:crr]{tidycmprsk::crr()}}), \code{n_event} takes
into account only the event of interest defined by \code{failcode.}
See \code{\link[=tidy_add_n]{tidy_add_n()}} for more details.
-The total number of observations (\code{N_obs}), of events (\code{N_event}) and of
-exposure time (\code{Exposure}) are stored as attributes of the returned tibble.
+The total number of observations (\code{N_obs}), of individuals (\code{N_ind}), of
+events (\code{N_event}) and of exposure time (\code{Exposure}) are stored as attributes
+of the returned tibble.
This function does not cover \code{lavaan} models (\code{NULL} is returned).
}
diff --git a/man/tidy_add_n.Rd b/man/tidy_add_n.Rd
index dc0d3651..04410269 100644
--- a/man/tidy_add_n.Rd
+++ b/man/tidy_add_n.Rd
@@ -53,10 +53,11 @@ as \code{glm(y ~ x + offset(log(z)), family = poisson)}). Observed rates could b
obtained with \code{n_event / exposure}.
For Cox models (\code{\link[survival:coxph]{survival::coxph()}}), an individual could be coded
-with several observations (several rows). \code{n_obs} will correspond to the weighted
-number of observations which could be different from the number of
-individuals. \code{tidy_add_n()} will also compute a (weighted) number of events
-(\code{n_event}) according to the definition of the \code{\link[survival:Surv]{survival::Surv()}} object.
+with several observations (several rows). \code{n_obs} will correspond to the
+weighted number of observations which could be different from the number of
+individuals \code{n_ind}. \code{tidy_add_n()} will also compute a (weighted) number of
+events (\code{n_event}) according to the definition of the \code{\link[survival:Surv]{survival::Surv()}}
+object.
Exposure time is also returned in \code{exposure} column. It is equal to the
(weighted) sum of the time variable if only one variable time is passed to
\code{\link[survival:Surv]{survival::Surv()}}, and to the (weighted) sum of \code{time2 - time} if two time
@@ -65,9 +66,9 @@ variables are defined in \code{\link[survival:Surv]{survival::Surv()}}.
For competing risk regression models (\code{\link[tidycmprsk:crr]{tidycmprsk::crr()}}), \code{n_event} takes
into account only the event of interest defined by \code{failcode.}
-The (weighted) total number of observations (\code{N_obs}), of events (\code{N_event}) and
-of exposure time (\code{Exposure}) are stored as attributes of the returned
-tibble.
+The (weighted) total number of observations (\code{N_obs}), of individuals
+(\code{N_ind}), of events (\code{N_event}) and of exposure time (\code{Exposure}) are
+stored as attributes of the returned tibble.
}
\examples{
\dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
diff --git a/tests/testthat/test-add_n.R b/tests/testthat/test-add_n.R
index 9b485f3e..bad04254 100644
--- a/tests/testthat/test-add_n.R
+++ b/tests/testthat/test-add_n.R
@@ -174,7 +174,9 @@ test_that("tidy_add_n() works with survival::coxph", {
skip_on_cran()
df <- survival::lung %>% dplyr::mutate(sex = factor(sex))
mod <- survival::coxph(survival::Surv(time, status) ~ ph.ecog + age + sex, data = df)
- expect_error(mod %>% tidy_and_attach() %>% tidy_add_n(), NA)
+ expect_error(res <- mod %>% tidy_and_attach() %>% tidy_add_n(), NA)
+ expect_equivalent(res$n_ind, c(227, 227, 90))
+ expect_equivalent(attr(res, "N_ind"), 227)
})
test_that("tidy_add_n() works with survival::survreg", {
diff --git a/tests/testthat/test-model_get_n.R b/tests/testthat/test-model_get_n.R
index b6d6492d..76e758cd 100644
--- a/tests/testthat/test-model_get_n.R
+++ b/tests/testthat/test-model_get_n.R
@@ -240,7 +240,10 @@ test_that("model_get_n() works with survival::coxph", {
df <- survival::lung %>% dplyr::mutate(sex = factor(sex))
mod <- survival::coxph(survival::Surv(time, status) ~ ph.ecog + age + sex, data = df)
expect_error(res <- mod %>% model_get_n(), NA)
- expect_equivalent(names(res), c("term", "n_obs", "n_event", "exposure"))
+ expect_equivalent(
+ names(res),
+ c("term", "n_obs", "n_ind", "n_event", "exposure")
+ )
test <- list(
start = c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8),
@@ -250,8 +253,12 @@ test_that("model_get_n() works with survival::coxph", {
)
mod <- survival::coxph(survival::Surv(start, stop, event) ~ x, test)
expect_error(res <- mod %>% model_get_n(), NA)
- expect_equivalent(names(res), c("term", "n_obs", "n_event", "exposure"))
+ expect_equivalent(
+ names(res),
+ c("term", "n_obs", "n_ind", "n_event", "exposure")
+ )
expect_equivalent(res$n_obs, c(10, 10))
+ expect_equivalent(res$n_ind, c(10, 10))
expect_equivalent(res$n_event, c(7, 7))
expect_equivalent(res$exposure, c(43, 43))
})
@@ -264,7 +271,10 @@ test_that("model_get_n() works with survival::survreg", {
dist = "exponential"
)
expect_error(res <- mod %>% model_get_n(), NA)
- expect_equivalent(names(res), c("term", "n_obs", "n_event", "exposure"))
+ expect_equivalent(
+ names(res),
+ c("term", "n_obs", "n_ind", "n_event", "exposure")
+ )
})
test_that("model_get_n() works with nnet::multinom", {
@@ -401,7 +411,10 @@ test_that("model_get_n() works with tidycmprsk::crr", {
skip_on_cran()
skip_if_not_installed("tidycmprsk")
- mod <- tidycmprsk::crr(Surv(ttdeath, death_cr) ~ age + grade, tidycmprsk::trial)
+ mod <- tidycmprsk::crr(
+ survival::Surv(ttdeath, death_cr) ~ age + grade,
+ tidycmprsk::trial
+ )
res <- mod %>% tidy_plus_plus()
expect_equivalent(
res$n_event,
diff --git a/vignettes/tidy.Rmd b/vignettes/tidy.Rmd
index df2a5884..3d41ae96 100644
--- a/vignettes/tidy.Rmd
+++ b/vignettes/tidy.Rmd
@@ -308,6 +308,7 @@ tibble::tribble(
"label", "`tidy_add_term_labels()`", "String of term labels based on (1) labels provided in `labels` argument if provided; (2) factor levels for categorical variables coded with treatment, SAS or sum contrasts; (3) variable labels when there is only one term per variable; and (4) term name otherwise.
Require \"variable_label\" column. If needed, will automatically apply `tidy_add_variable_labels()`.
Require \"contrasts\" column. If needed, will automatically apply `tidy_add_contrasts()`.",
"header_row", "`tidy_add_header_rows()`", "Logical indicating if a row is a header row for variables with several terms. Is equal to `NA` for variables who do not have an header row.Require \"label\" column. If needed, will automatically apply `tidy_add_term_labels()`.
It is better to apply `tidy_add_header_rows()` after other `tidy_*` functions",
"n_obs", "`tidy_add_n()`", "Number of observations",
+ "n_ind", "`tidy_add_n()`", "Number of individuals (for Cox models)",
"n_event", "`tidy_add_n()`", "Number of events (for binomial and multinomial logistic models, Poisson and Cox models)",
"exposure", "`tidy_add_n()`", "Exposure time (for Poisson and Cox models)"
) %>%
@@ -346,6 +347,7 @@ tibble::tribble(
"Custom term labels passed to `tidy_add_term_labels()`",
"N_obs", "`tidy_add_n()`", "Total number of observations",
"N_event", "`tidy_add_n()`", "Total number of events",
+ "N_ind", "`tidy_add_n()`", "Total number of individuals (for Cox models)",
"Exposure", "`tidy_add_n()`", "Total of exposure time",
"component", "`tidy_zeroinfl()`", "`component` argument passed to `tidy_zeroinfl()`"
) %>%