Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Miscellaneous fixes for {cfr} #129

Merged
merged 4 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 40 additions & 7 deletions R/cfr_rolling.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#'
#' @details When delay correction is applied by passing a delay distribution
#' density function to `delay_density`, the internal function
#' [estimate_severity()] is used to calculate the rolling severity.
#' [.estimate_severity()] is used to calculate the rolling severity.
#'
#' Note that in the naive method the severity estimate and confidence intervals
#' cannot be calculated for days on which the cumulative number of cases since
Expand Down Expand Up @@ -45,7 +45,13 @@
cfr_rolling <- function(data,
delay_density = NULL,
poisson_threshold = 100) {
# input checking
# Add message to indicate this is a utility function
message(
"`cfr_rolling()` is a convenience function to help understand how",
" additional data influences the overall (static) severity.",
" Use `cfr_time_varying()` instead to estimate severity changes over",
" the course of the outbreak."
)
# input checking
checkmate::assert_data_frame(
data,
Expand All @@ -59,6 +65,7 @@ cfr_rolling <- function(data,
# check for any NAs among data
checkmate::assert_data_frame(
data[, c("date", "cases", "deaths")],
types = c("Date", "integerish"),
any.missing = FALSE
)
# check that data$date is a date column
Expand All @@ -80,6 +87,14 @@ cfr_rolling <- function(data,
cumulative_cases <- cumsum(data$cases)
cumulative_deaths <- cumsum(data$deaths)

# Check cumulative sums for count type
checkmate::assert_integerish(cumulative_cases, lower = 0)
# use assert_number to set upper limit at total_cases
checkmate::assert_integerish(
cumulative_deaths,
upper = max(cumulative_cases), lower = 0
)

if (!is.null(delay_density)) {
# calculating the total number of cases and deaths after correcting for
# the number of cases with estimated outcomes and using this estimate as the
Expand All @@ -91,14 +106,32 @@ cfr_rolling <- function(data,

cumulative_outcomes <- cumsum(data$estimated_outcomes)

# Get direct estimates of daily p, throw message if some p are < 1e-4
# NOTE: choosing message rather than warning, as warnings are nearly
# guaranteed in the early stages of an outbreak due to poor data
p_mid_values <- cumulative_deaths / round(cumulative_outcomes)

if (any(is.infinite(p_mid_values) | p_mid_values < 1e-4)) {
message(
"Some daily ratios of total deaths to total cases with known outcome",
" are below 0.01%: some CFR estimates may be unreliable.",
call. = FALSE
)
}

# generate series of CFR estimates with expanding time window
severity_estimates <- Map(
cumulative_cases, cumulative_deaths, cumulative_outcomes,
f = estimate_severity, poisson_threshold = poisson_threshold
# Suppress method choice messages to prevent spamming user.
severity_estimates <- suppressMessages(
Map(
cumulative_cases, cumulative_deaths, cumulative_outcomes, p_mid_values,
f = function(cases, deaths, outcomes, p_mid) {
.estimate_severity(cases, deaths, outcomes, poisson_threshold, p_mid)
}
)
)

# bind list elements together
severity_estimates <- do.call(rbind, severity_estimates)
# bind list elements together; list to matrix to data.frame
severity_estimates <- as.data.frame(do.call(rbind, severity_estimates))
} else {
# check for good indices
indices <- which(
Expand Down
46 changes: 37 additions & 9 deletions R/cfr_static.R
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,12 @@ cfr_static <- function(data,
# check for any NAs among data
checkmate::assert_data_frame(
data[, c("date", "cases", "deaths")],
types = c("Date", "integerish"),
any.missing = FALSE
)
# check that data$date is a date column
checkmate::assert_date(data$date, any.missing = FALSE, all.missing = FALSE)

# check for excessive missing date and throw an error
stopifnot(
"Input data must have sequential dates with none missing or duplicated" =
Expand All @@ -123,6 +125,16 @@ cfr_static <- function(data,
checkmate::assert_count(poisson_threshold, positive = TRUE)

# NOTE: delay_density is checked in estimate_outcomes() if passed and not NULL
# calculating the total number of cases (without correcting) and deaths

# Calculate total cases and deaths to pass to secondary checking
total_cases <- sum(data$cases, na.rm = TRUE)
total_deaths <- sum(data$deaths, na.rm = TRUE)

# Add input checking for total cases and deaths
checkmate::assert_count(total_cases)
# use assert_number to set upper limit at total_cases
checkmate::assert_number(total_deaths, upper = total_cases, lower = 0)

# apply delay correction if a delay distribution is provided
if (!is.null(delay_density)) {
Expand All @@ -134,19 +146,35 @@ cfr_static <- function(data,
delay_density = delay_density
)

# calculate total cases, deaths, and outcomes
total_outcomes <- sum(df_corrected$estimated_outcomes, na.rm = TRUE)

# NOTE: previous code used `u_t = total_outcomes / total_cases`
# which can be simplified in all operations to simply `total_outcomes`

# Get direct estimate of p, and throw warning if p < 1e-4
p_mid <- total_deaths / round(total_outcomes)

if (p_mid < 1e-4) {
warning(
"Ratio of total deaths to total cases with known outcome",
" is below 0.01%: CFR estimates may be unreliable.",
call. = FALSE
)
}

# calculating the maximum likelihood estimate and 95% confidence interval
# using the binomial likelihood function from Nishiura
severity_estimate <- estimate_severity(
total_cases = sum(df_corrected$cases, na.rm = TRUE),
total_deaths = sum(df_corrected$deaths, na.rm = TRUE),
total_outcomes = sum(df_corrected$estimated_outcomes, na.rm = TRUE),
poisson_threshold = poisson_threshold
severity_estimate <- .estimate_severity(
total_cases = total_cases,
total_deaths = total_deaths,
total_outcomes = total_outcomes,
poisson_threshold = poisson_threshold,
p_mid = p_mid
)
# .estimate_severity() returns vector; convert to list and then data.frame
severity_estimate <- as.data.frame(as.list(severity_estimate))
} else {
# calculating the total number of cases (without correcting) and deaths
total_cases <- sum(data$cases, na.rm = TRUE)
total_deaths <- sum(data$deaths, na.rm = TRUE)

# calculating the central estimate
severity_mean <- total_deaths / total_cases

Expand Down
132 changes: 99 additions & 33 deletions R/estimate_severity.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
#' to the total number of cases.
#' @param total_outcomes The total number of outcomes expected to be observed
#' over the period of an outbreak of interest. See [estimate_outcomes()].
#' @param p_mid The initial severity estimate, which is used to determine the
#' likelihood approximation used when `total_cases` > `poisson_threshold`.
#' Defaults to `total_deaths / round(total_outcomes)`.
#' @keywords internal
#' @return A `<data.frame>` with one row and three columns for the maximum
#' likelihood estimate and 95% confidence interval of the corrected severity
Expand All @@ -37,61 +40,124 @@
#' `total_cases < poisson_threshold` the confidence intervals cannot be
#' calculated and are returned as `NA`s while the severity estimate is returned
#' as `0.999`.
estimate_severity <- function(total_cases,
total_deaths,
total_outcomes,
poisson_threshold = 100) {
# Add input checking for single numbers
checkmate::assert_count(total_cases)
# use assert_number to set upper limit at total_cases
checkmate::assert_number(total_deaths, upper = total_cases, lower = 0)
# expect that the estimated number of outcomes is greater
# need not be integer-like, need not be limited by cases or deaths
checkmate::assert_number(total_outcomes, lower = 0, finite = TRUE)
checkmate::assert_count(poisson_threshold, positive = TRUE)
.estimate_severity <- function(total_cases,
total_deaths,
total_outcomes,
poisson_threshold,
p_mid = total_deaths / round(total_outcomes)) {
# NOTE: no input checking on internal fn

# check for special case where any two of cases, deaths, and outcomes are zero
# NOTE: total_cases needed only here
if (sum(c(total_cases, total_deaths, total_outcomes) == 0) >= 2) {
return(
data.frame(
c(
severity_mean = NA_real_,
severity_low = NA_real_,
severity_high = NA_real_
)
)
}

# calculating the proportion of cases with known outcome
u_t <- total_outcomes / total_cases
# NOTE: previous code used `u_t = total_outcomes / total_cases`
# which can be simplified in all operations to simply `total_outcomes`

# select likelihood function
func_likelihood <- .select_func_likelihood(
total_cases, poisson_threshold, p_mid
)

# maximum likelihood estimation for corrected severity
pprange <- seq(from = 1e-3, to = 1.0, by = 1e-3)
pprange <- seq(from = 1e-4, to = 1.0, by = 1e-4)

# Calculate likelihood - use binomial for small samples and Poisson
# approximation for larger numbers
if (total_cases < poisson_threshold) {
lik <- lchoose(round(u_t * total_cases), total_deaths) +
(total_deaths * log(pprange)) +
(((u_t * total_cases) - total_deaths) * log(1.0 - pprange))
} else {
lik <- stats::dpois(
total_deaths, pprange * round(u_t * total_cases),
log = TRUE
)
}
# get likelihoods using selected function
lik <- func_likelihood(total_outcomes, total_deaths, pprange)

# maximum likelihood estimate - if this is empty, return NA
severity_mean <- pprange[which.max(lik)]

# 95% confidence interval of likelihood
severity_lims <- range(pprange[lik >= (max(lik) - 1.92)])

severity_estimate <- data.frame(
severity_mean = severity_mean,
severity_low = severity_lims[[1]],
severity_high = severity_lims[[2]]
)
# return a vector for easy conversion to data
severity_estimate <- c(severity_mean, severity_lims)
names(severity_estimate) <- sprintf("severity_%s", c("mean", "low", "high"))

# returning vector with corrected estimates
severity_estimate
}

#' @title Select a likelihood function for severity estimation
#' @description
#' Switches between Binomial, Poisson, and Normal approximation based on the
#' total number of cases and an initial estimate of the severity.
#'
#' @param total_cases A single count for the total number of cases in the
#' outbreak.
#' @param poisson_threshold A single count for the threshold of cases above
#' which a Poisson or Normal approximation is returned.
#' @param p_mid A single positive number bounded 0 -- 1, representing an initial
#' estimate of the severity, which is used to determine whether a Poisson or
#' Normal approximation is returned.
#' determine whether
#' @details
#' Returns a likelihood function as follows:
#'
#' - Binomial approximation: when `total_cases < poisson_threshold`,
#' used when there are few cases, such as in a small outbreak;
#'
#' - Poisson approximation: when `total_cases >= poisson_threshold` but
#' when `p_mid` < 0.05;
#'
#' - Normal approximation: when `total_cases >= poisson_threshold` and
#' `p_mid >=` 0.05.
#'
#' @return A function with three arguments, `total_outcomes`, `total_deaths`,
#' and `pp`, which is used to generate the profile likelihood.
#' Also prints messages to the screen when a Poisson or Normal approximation
#' function is returned.
#' @keywords internal
.select_func_likelihood <- function(total_cases, poisson_threshold, p_mid) {
# NOTE: internal function is not input checked
# switch likelihood function based on total cases and p_mid
# Binomial approx
if (total_cases < poisson_threshold) {
func_likelihood <- function(total_outcomes, total_deaths, pp) {
lchoose(round(total_outcomes), total_deaths) +
(total_deaths * log(pp)) +
(((total_outcomes) - total_deaths) * log(1.0 - pp))
}
}

# Poisson approx
if ((total_cases >= poisson_threshold) && p_mid < 0.05) {
func_likelihood <- function(total_outcomes, total_deaths, pp) {
stats::dpois(
total_deaths, pp * round(total_outcomes),
log = TRUE
)
}
message(
"Total cases = ", total_cases, " and p = ", signif(p_mid, 3),
": using Poisson approximation to binomial likelihood."
)
}

# Normal approx
if ((total_cases >= poisson_threshold) && p_mid >= 0.05) {
func_likelihood <- function(total_outcomes, total_deaths, pp) {
stats::dnorm(
total_deaths,
mean = pp * round(total_outcomes),
sd = pp * (1 - pp) * round(total_outcomes),
log = TRUE
)
}
message(
"Total cases = ", total_cases, " and p = ", signif(p_mid, 3),
": using Normal approximation to binomial likelihood."
)
}

func_likelihood
}
2 changes: 1 addition & 1 deletion man/cfr_rolling.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 9 additions & 4 deletions man/estimate_severity.Rd → man/dot-estimate_severity.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading