From 5552f707f9a0553a74a3fc750d4252ff8ae2b3c6 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Sun, 5 Sep 2021 12:36:09 -0700 Subject: [PATCH 01/65] submodels --- NAMESPACE | 7 ++ R/Param_base.R | 9 +- R/submodels.R | 134 +++++++++++++++++++++- man/generate_loss_function_from_family.Rd | 14 +++ man/generate_submodel_from_family.Rd | 14 +++ man/loss_function_least_squares.Rd | 25 ++++ man/loss_function_loglik_binomial.Rd | 25 ++++ man/loss_function_poisson.Rd | 20 ++++ man/submodel_exp.Rd | 18 +++ man/submodel_linear.Rd | 18 +++ tmle3.Rproj | 5 + 11 files changed, 285 insertions(+), 4 deletions(-) create mode 100644 man/generate_loss_function_from_family.Rd create mode 100644 man/generate_submodel_from_family.Rd create mode 100644 man/loss_function_least_squares.Rd create mode 100644 man/loss_function_loglik_binomial.Rd create mode 100644 man/loss_function_poisson.Rd create mode 100644 man/submodel_exp.Rd create mode 100644 man/submodel_linear.Rd diff --git a/NAMESPACE b/NAMESPACE index a61e1e34..cf12f945 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -35,7 +35,12 @@ export(delta_param_RR) export(density_formula) export(discretize_variable) export(fit_tmle3) +export(generate_loss_function_from_family) +export(generate_submodel_from_family) export(get_propensity_scores) +export(loss_function_least_squares) +export(loss_function_loglik_binomial) +export(loss_function_poisson) export(make_CF_Likelihood) export(make_Likelihood) export(make_tmle3_Task) @@ -46,6 +51,8 @@ export(point_tx_task) export(process_missing) export(propensity_score_plot) export(propensity_score_table) +export(submodel_exp) +export(submodel_linear) export(submodel_logit) export(summary_from_estimates) export(survival_tx_likelihood) diff --git a/R/Param_base.R b/R/Param_base.R index 4fbb5311..5c02fde9 100644 --- a/R/Param_base.R +++ b/R/Param_base.R @@ -50,6 +50,9 @@ Param_base <- R6Class( }, print = function() { cat(sprintf("%s: %s\n", class(self)[1], self$name)) + }, + supports_submodel = function(submodel_name) { + return(submodel_name %in% c(private$.submodel)) } ), active = list( @@ -71,6 +74,9 @@ Param_base <- R6Class( }, targeted = function() { return(private$.targeted) + }, + submodel = function() { + return(private$.submodel) } ), private = list( @@ -78,7 +84,8 @@ Param_base <- R6Class( .observed_likelihood = NULL, .outcome_node = NULL, .targeted = TRUE, - .supports_outcome_censoring = FALSE + .supports_outcome_censoring = FALSE, + .submodel = "logistic" ) ) diff --git a/R/submodels.R b/R/submodels.R index 3a7f60f7..56cfbf79 100644 --- a/R/submodels.R +++ b/R/submodels.R @@ -1,3 +1,19 @@ +#' Generate Fluctuation Submodel from \code{family} object. +#' +#' @param family ... +#' +#' @export +# +generate_submodel_from_family <- function(family) { + linkfun <- family$linkfun + linkinv <- family$linkinv + submodel <- function(eps, X, offset) { + linkinv(linkfun(offset) + X %*% eps) + } + return(submodel) +} + + #' Logistic Submodel Fluctuation #' #' @param eps ... @@ -8,7 +24,119 @@ #' #' @export # -submodel_logit <- function(eps, X, offset) { - preds <- stats::plogis(stats::qlogis(offset) + X %*% eps) - return(preds) +submodel_logit <- generate_submodel_from_family(binomial()) + +#' Log likelihood loss for binary variables +#' +#' @param estimate ... +#' @param observed ... +#' @param weights ... +#' @param v ... +#' @export +loss_function_loglik_binomial = function(estimate, observed, weights = NULL, likelihood = NULL) { + loss <- -1 * ifelse(observed == 1, log(estimate), log(1 - estimate)) + if(!is.null(weights)) { + loss <- weights * loss + } + return(loss) +} + +#' Linear (gaussian) Submodel Fluctuation +#' +#' @param eps ... +#' @param X ... +#' @param offset ... +#' +#' +#' @export +# +submodel_linear <- generate_submodel_from_family(gaussian()) +#' Least-squares loss for binary variables +#' +#' @param estimate ... +#' @param observed ... +#' @param weights ... +#' @param likelihood ... +#' @export +loss_function_least_squares = function(estimate, observed, weights = NULL, likelihood = NULL) { + loss <- (observed - estimate)^2 + if(!is.null(weights)) { + loss <- weights * loss + } + return(loss) +} + + +#' Log-linear (Poisson) Submodel Fluctuation +#' +#' @param eps ... +#' @param X ... +#' @param offset ... +#' +#' +#' @export +# +submodel_exp <- generate_submodel_from_family(poisson()) + +#' Poisson/log-linear loss for nonnegative variables +#' +#' @param estimate ... +#' @param observed ... +#' @param weights ... +#' @param likelihood ... +#' @export +loss_function_poisson = function(estimate, observed, weights = NULL, likelihood = NULL) { + loss <- estimate - observed * log(estimate) + if(!is.null(weights)) { + loss <- weights * loss + } + return(loss) +} + +#' Generate loss function loss from family object or string +#' @param family ... +#' @export +generate_loss_function_from_family <- function(family) { + if(!is.character(family)) { + family <- family$family + } + if(!(family %in% c("poisson", "gaussian", "binomial"))){ + stop("Unsupported family object.") + } + if(family == "poisson"){ + return(loss_function_poisson) + } else if(family == "gaussian"){ + return(loss_function_least_squares) + } else if(family == "binomial"){ + return(loss_function_loglik_binomial) + } +} +make_submodel_spec <- function(name, family = NULL, submodel_function = NULL, risk_function = NULL) { + if(is.null(submodel_function) && inherits(submodel_function, "family")) { + submodel_function <- generate_submodel_from_family(submodel_function) + } else if(is.null(submodel_function) && !is.null(family)) { + submodel_function <- generate_submodel_from_family(family) + } + if(is.null(risk_function) && inherits(risk_function, "family")) { + generate_loss_function_from_family(risk_function) + } else if(is.null(risk_function) && !is.null(family)) { + risk_function <- generate_loss_function_from_family(family) + } + return(list(name = name, family = family, submodel_function = submodel_function, risk_function = risk_function)) +} + + +get_submodel_spec <- function(name) { + output <- NULL + tryCatch({ + family <- get(name) + output <- make_submodel_spec(name, family) + }, error = function(...) { + try({output <<- get(paste0("submodel_spec_",name))}) + }) + if(is.null(output)) { + stop(paste0("Argument name was not a valid family nor was `submodel_spec_", name, "` found in the environment.")) + } + return(output) } + diff --git a/man/generate_loss_function_from_family.Rd b/man/generate_loss_function_from_family.Rd new file mode 100644 index 00000000..2c186bdc --- /dev/null +++ b/man/generate_loss_function_from_family.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/submodels.R +\name{generate_loss_function_from_family} +\alias{generate_loss_function_from_family} +\title{Generate loss function loss from family object or string} +\usage{ +generate_loss_function_from_family(family) +} +\arguments{ +\item{family}{...} +} +\description{ +Generate loss function loss from family object or string +} diff --git a/man/generate_submodel_from_family.Rd b/man/generate_submodel_from_family.Rd new file mode 100644 index 00000000..ad1165ef --- /dev/null +++ b/man/generate_submodel_from_family.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/submodels.R +\name{generate_submodel_from_family} +\alias{generate_submodel_from_family} +\title{Generate Fluctuation Submodel from \code{family} object.} +\usage{ +generate_submodel_from_family(family) +} +\arguments{ +\item{family}{...} +} +\description{ +Generate Fluctuation Submodel from \code{family} object. +} diff --git a/man/loss_function_least_squares.Rd b/man/loss_function_least_squares.Rd new file mode 100644 index 00000000..9b58ae63 --- /dev/null +++ b/man/loss_function_least_squares.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/submodels.R +\name{loss_function_least_squares} +\alias{loss_function_least_squares} +\title{Least-squares loss for binary variables} +\usage{ +loss_function_least_squares( + estimate, + observed, + weights = NULL, + likelihood = NULL +) +} +\arguments{ +\item{estimate}{...} + +\item{observed}{...} + +\item{weights}{...} + +\item{likelihood}{...} +} +\description{ +Least-squares loss for binary variables +} diff --git a/man/loss_function_loglik_binomial.Rd b/man/loss_function_loglik_binomial.Rd new file mode 100644 index 00000000..323b5846 --- /dev/null +++ b/man/loss_function_loglik_binomial.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/submodels.R +\name{loss_function_loglik_binomial} +\alias{loss_function_loglik_binomial} +\title{Log likelihood loss for binary variables} +\usage{ +loss_function_loglik_binomial( + estimate, + observed, + weights = NULL, + likelihood = NULL +) +} +\arguments{ +\item{estimate}{...} + +\item{observed}{...} + +\item{weights}{...} + +\item{v}{...} +} +\description{ +Log likelihood loss for binary variables +} diff --git a/man/loss_function_poisson.Rd b/man/loss_function_poisson.Rd new file mode 100644 index 00000000..bff6f165 --- /dev/null +++ b/man/loss_function_poisson.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/submodels.R +\name{loss_function_poisson} +\alias{loss_function_poisson} +\title{Poisson/log-linear loss for nonnegative variables} +\usage{ +loss_function_poisson(estimate, observed, weights = NULL, likelihood = NULL) +} +\arguments{ +\item{estimate}{...} + +\item{observed}{...} + +\item{weights}{...} + +\item{likelihood}{...} +} +\description{ +Poisson/log-linear loss for nonnegative variables +} diff --git a/man/submodel_exp.Rd b/man/submodel_exp.Rd new file mode 100644 index 00000000..b392259e --- /dev/null +++ b/man/submodel_exp.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/submodels.R +\name{submodel_exp} +\alias{submodel_exp} +\title{Log-linear (Poisson) Submodel Fluctuation} +\usage{ +submodel_exp(eps, X, offset) +} +\arguments{ +\item{eps}{...} + +\item{X}{...} + +\item{offset}{...} +} +\description{ +Log-linear (Poisson) Submodel Fluctuation +} diff --git a/man/submodel_linear.Rd b/man/submodel_linear.Rd new file mode 100644 index 00000000..98cfe486 --- /dev/null +++ b/man/submodel_linear.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/submodels.R +\name{submodel_linear} +\alias{submodel_linear} +\title{Linear (gaussian) Submodel Fluctuation} +\usage{ +submodel_linear(eps, X, offset) +} +\arguments{ +\item{eps}{...} + +\item{X}{...} + +\item{offset}{...} +} +\description{ +Linear (gaussian) Submodel Fluctuation +} diff --git a/tmle3.Rproj b/tmle3.Rproj index d848a9ff..cba1b6b7 100644 --- a/tmle3.Rproj +++ b/tmle3.Rproj @@ -5,8 +5,13 @@ SaveWorkspace: No AlwaysSaveHistory: Default EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 Encoding: UTF-8 +RnwWeave: Sweave +LaTeX: pdfLaTeX + AutoAppendNewline: Yes StripTrailingWhitespace: Yes From 15d0089350939bbfb233fc46ff76d459ae95936a Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Sun, 5 Sep 2021 12:47:45 -0700 Subject: [PATCH 02/65] submodels --- NAMESPACE | 4 +++- R/Param_base.R | 5 ++++- R/submodels.R | 22 +++++++++++++++++++--- man/submodel_logit.Rd | 18 ------------------ 4 files changed, 26 insertions(+), 23 deletions(-) delete mode 100644 man/submodel_logit.Rd diff --git a/NAMESPACE b/NAMESPACE index cf12f945..b0f34ab5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -38,11 +38,13 @@ export(fit_tmle3) export(generate_loss_function_from_family) export(generate_submodel_from_family) export(get_propensity_scores) +export(get_submodel_spec) export(loss_function_least_squares) export(loss_function_loglik_binomial) export(loss_function_poisson) export(make_CF_Likelihood) export(make_Likelihood) +export(make_submodel_spec) export(make_tmle3_Task) export(plot_vim) export(point_tx_likelihood) @@ -53,7 +55,7 @@ export(propensity_score_plot) export(propensity_score_table) export(submodel_exp) export(submodel_linear) -export(submodel_logit) +export(submodel_logistic) export(summary_from_estimates) export(survival_tx_likelihood) export(survival_tx_npsem) diff --git a/R/Param_base.R b/R/Param_base.R index 5c02fde9..2a00fa67 100644 --- a/R/Param_base.R +++ b/R/Param_base.R @@ -53,6 +53,9 @@ Param_base <- R6Class( }, supports_submodel = function(submodel_name) { return(submodel_name %in% c(private$.submodel)) + }, + get_submodel_spec = function() { + return(get_submodel_spec(private$.submodel)) } ), active = list( @@ -85,7 +88,7 @@ Param_base <- R6Class( .outcome_node = NULL, .targeted = TRUE, .supports_outcome_censoring = FALSE, - .submodel = "logistic" + .submodel = "binomial_logit" ) ) diff --git a/R/submodels.R b/R/submodels.R index 56cfbf79..22359ab7 100644 --- a/R/submodels.R +++ b/R/submodels.R @@ -24,7 +24,7 @@ generate_submodel_from_family <- function(family) { #' #' @export # -submodel_logit <- generate_submodel_from_family(binomial()) +submodel_logistic <- generate_submodel_from_family(binomial()) #' Log likelihood loss for binary variables #' @@ -111,6 +111,11 @@ generate_loss_function_from_family <- function(family) { return(loss_function_loglik_binomial) } } + + +#' Main maker of submodel specs. +#' @param name ... +#' @export make_submodel_spec <- function(name, family = NULL, submodel_function = NULL, risk_function = NULL) { if(is.null(submodel_function) && inherits(submodel_function, "family")) { submodel_function <- generate_submodel_from_family(submodel_function) @@ -125,11 +130,22 @@ make_submodel_spec <- function(name, family = NULL, submodel_function = NULL, r return(list(name = name, family = family, submodel_function = submodel_function, risk_function = risk_function)) } - +#' Main getter for submodel specs. +#' @param name Either a name for submodel spec or a family object. +#' @export get_submodel_spec <- function(name) { output <- NULL tryCatch({ - family <- get(name) + if(inherits(name, "family")) { + family <- name + } else { + split_names <- unlist(strsplit(name, "_")) + if(length(split_names)==2) { + family <- get(split_names[1])(link = split_names[2]) + } else { + family <- get(split_names[1])() + } + } output <- make_submodel_spec(name, family) }, error = function(...) { try({output <<- get(paste0("submodel_spec_",name))}) diff --git a/man/submodel_logit.Rd b/man/submodel_logit.Rd deleted file mode 100644 index bdba8739..00000000 --- a/man/submodel_logit.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/submodels.R -\name{submodel_logit} -\alias{submodel_logit} -\title{Logistic Submodel Fluctuation} -\usage{ -submodel_logit(eps, X, offset) -} -\arguments{ -\item{eps}{...} - -\item{X}{...} - -\item{offset}{...} -} -\description{ -Logistic Submodel Fluctuation -} From 8d86c5383c73299fe85e437dedf083d9768c132f Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Sun, 5 Sep 2021 13:37:56 -0700 Subject: [PATCH 03/65] supports general submodels. Tests pass --- R/Param_base.R | 10 ++--- R/submodels.R | 22 +++++------ R/tmle3_Update.R | 59 +++++++++++++++++++++------- man/get_submodel_spec.Rd | 11 ++++++ man/loss_function_least_squares.Rd | 4 +- man/loss_function_loglik_binomial.Rd | 4 +- man/loss_function_poisson.Rd | 9 ++++- man/make_submodel_spec.Rd | 19 +++++++++ man/submodel_exp.Rd | 6 +-- man/submodel_linear.Rd | 6 +-- man/submodel_logistic.Rd | 18 +++++++++ tests/testthat/test-ATE.R | 2 + 12 files changed, 131 insertions(+), 39 deletions(-) create mode 100644 man/get_submodel_spec.Rd create mode 100644 man/make_submodel_spec.Rd create mode 100644 man/submodel_logistic.Rd diff --git a/R/Param_base.R b/R/Param_base.R index 2a00fa67..0e83c0f1 100644 --- a/R/Param_base.R +++ b/R/Param_base.R @@ -51,11 +51,11 @@ Param_base <- R6Class( print = function() { cat(sprintf("%s: %s\n", class(self)[1], self$name)) }, - supports_submodel = function(submodel_name) { - return(submodel_name %in% c(private$.submodel)) + supports_submodel = function(submodel_name, node) { + return(submodel_name %in% c(private$.submodel[[node]])) }, - get_submodel_spec = function() { - return(get_submodel_spec(private$.submodel)) + get_submodel_spec = function(update_node) { + return(get_submodel_spec(private$.submodel[[update_node]])) } ), active = list( @@ -88,7 +88,7 @@ Param_base <- R6Class( .outcome_node = NULL, .targeted = TRUE, .supports_outcome_censoring = FALSE, - .submodel = "binomial_logit" + .submodel = list("A" = "binomial_logit", "Y" = "binomial_logit") ) ) diff --git a/R/submodels.R b/R/submodels.R index 22359ab7..6b3c0f57 100644 --- a/R/submodels.R +++ b/R/submodels.R @@ -7,7 +7,7 @@ generate_submodel_from_family <- function(family) { linkfun <- family$linkfun linkinv <- family$linkinv - submodel <- function(eps, X, offset) { + submodel <- function(eps, offset, X) { linkinv(linkfun(offset) + X %*% eps) } return(submodel) @@ -33,7 +33,7 @@ submodel_logistic <- generate_submodel_from_family(binomial()) #' @param weights ... #' @param v ... #' @export -loss_function_loglik_binomial = function(estimate, observed, weights = NULL, likelihood = NULL) { +loss_function_loglik_binomial = function(estimate, observed, weights = NULL, likelihood = NULL, tmle_task = NULL, fold_number = NULL) { loss <- -1 * ifelse(observed == 1, log(estimate), log(1 - estimate)) if(!is.null(weights)) { loss <- weights * loss @@ -58,7 +58,7 @@ submodel_linear <- generate_submodel_from_family(gaussian()) #' @param weights ... #' @param likelihood ... #' @export -loss_function_least_squares = function(estimate, observed, weights = NULL, likelihood = NULL) { +loss_function_least_squares = function(estimate, observed, weights = NULL, likelihood = NULL, tmle_task = NULL, fold_number = NULL) { loss <- (observed - estimate)^2 if(!is.null(weights)) { loss <- weights * loss @@ -85,7 +85,7 @@ submodel_exp <- generate_submodel_from_family(poisson()) #' @param weights ... #' @param likelihood ... #' @export -loss_function_poisson = function(estimate, observed, weights = NULL, likelihood = NULL) { +loss_function_poisson = function(estimate, observed, weights = NULL, likelihood = NULL, tmle_task = NULL, fold_number = NULL) { loss <- estimate - observed * log(estimate) if(!is.null(weights)) { loss <- weights * loss @@ -116,22 +116,22 @@ generate_loss_function_from_family <- function(family) { #' Main maker of submodel specs. #' @param name ... #' @export -make_submodel_spec <- function(name, family = NULL, submodel_function = NULL, risk_function = NULL) { +make_submodel_spec <- function(name, family = NULL, submodel_function = NULL, loss_function = NULL) { if(is.null(submodel_function) && inherits(submodel_function, "family")) { submodel_function <- generate_submodel_from_family(submodel_function) } else if(is.null(submodel_function) && !is.null(family)) { submodel_function <- generate_submodel_from_family(family) } - if(is.null(risk_function) && inherits(risk_function, "family")) { - generate_loss_function_from_family(risk_function) - } else if(is.null(risk_function) && !is.null(family)) { - risk_function <- generate_loss_function_from_family(family) + if(is.null(loss_function) && inherits(loss_function, "family")) { + generate_loss_function_from_family(loss_function) + } else if(is.null(loss_function) && !is.null(family)) { + loss_function <- generate_loss_function_from_family(family) } - return(list(name = name, family = family, submodel_function = submodel_function, risk_function = risk_function)) + return(list(name = name, family = family, submodel_function = submodel_function, loss_function = loss_function)) } #' Main getter for submodel specs. -#' @param name Either a name for submodel spec or a family object. +#' @param name Either a name for a submodel spec obtainable from environment (name --> get(paste0("submodel_spec_",name))}), a family object or string, or a string of the form "family_link" (e.g. "binomial_logit"). #' @export get_submodel_spec <- function(name) { output <- NULL diff --git a/R/tmle3_Update.R b/R/tmle3_Update.R index c319ffb7..a14b199a 100644 --- a/R/tmle3_Update.R +++ b/R/tmle3_Update.R @@ -114,6 +114,17 @@ tmle3_Update <- R6Class( update_node = "Y", drop_censored = FALSE) { + # USE first parameter to get submodel spec + submodel_spec <- self$tmle_params[[1]]$get_submodel_spec(update_node) + submodel_name <- submodel_spec$name + # Check compatibility of tmle_params with submodel + lapply(self$tmle_params, function(tmle_param) { + if(update_node %in% tmle_param$update_nodes ) { + if(!(tmle_param$supports_submodel(submodel_name, update_node))){ + stop(paste0("Incompatible parameter-specific submodel specs for update node: Parameter `", tmle_param$name, "`` does not support the submodel `", submodel_name, "` for update node `", update_node, "`.")) + } + } + }) # TODO: change clever covariates to allow only calculating some nodes clever_covariates <- lapply(self$tmle_params, function(tmle_param) { tmle_param$clever_covariates(tmle_task, fold_number) @@ -161,9 +172,27 @@ tmle3_Update <- R6Class( } } + submodel_data$submodel_spec <- submodel_spec + # To support arbitrary likelihood-dependent risk functions for updating. + # Is carrying this stuff around a problem computationally? + submodel_data$tmle_task <- tmle_task + submodel_data$likelihood <- likelihood + submodel_data$fold_number <- fold_number + return(submodel_data) }, fit_submodel = function(submodel_data) { + # Extract submodel spec info + submodel_spec <- submodel_data$submodel_spec + family_object <- submodel_spec$family + loss_function <- submodel_spec$loss_function + submodel <- submodel_spec$submodel_function + training_likelihood <- submodel_data$likelihood + training_task <- submodel_data$tmle_task + training_fold <- submodel_data$fold_number + # Subset to only numericals needed for fitting. + submodel_data <- submodel_data[c("observed", "H", "initial")] + if (self$constrain_step) { ncol_H <- ncol(submodel_data$H) if (!(is.null(ncol_H) || (ncol_H == 1))) { @@ -175,8 +204,8 @@ tmle3_Update <- R6Class( risk <- function(epsilon) { - submodel_estimate <- self$apply_submodel(submodel_data, epsilon) - loss <- self$loss_function(submodel_estimate, submodel_data$observed) + submodel_estimate <- self$apply_submodel(submodel, submodel_data, epsilon) + loss <- loss_function(submodel_estimate, submodel_data$observed, training_likelihood, training_task, training_fold) mean(loss) } @@ -192,8 +221,8 @@ tmle3_Update <- R6Class( epsilon <- self$delta_epsilon } - risk_val <- risk(epsilon) - risk_zero <- risk(0) + #risk_val <- risk(epsilon) + #risk_zero <- risk(0) # # TODO: consider if we should do this # if(risk_zero Date: Sun, 5 Sep 2021 13:46:00 -0700 Subject: [PATCH 04/65] supports general submodel and weights. JK on previous commit --- R/tmle3_Update.R | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/R/tmle3_Update.R b/R/tmle3_Update.R index a14b199a..9fe765b1 100644 --- a/R/tmle3_Update.R +++ b/R/tmle3_Update.R @@ -147,6 +147,7 @@ tmle3_Update <- R6Class( # scale observed and predicted values for bounded continuous observed <- tmle_task$scale(observed, update_node) initial <- tmle_task$scale(initial, update_node) + weights <- tmle_task$get_regression_task(update_node)$weights # protect against qlogis(1)=Inf @@ -155,7 +156,8 @@ tmle3_Update <- R6Class( submodel_data <- list( observed = observed, H = covariates_dt, - initial = initial + initial = initial, + weights = weights ) @@ -167,7 +169,8 @@ tmle3_Update <- R6Class( submodel_data <- list( observed = submodel_data$observed[subset], H = submodel_data$H[subset, , drop = FALSE], - initial = submodel_data$initial[subset] + initial = submodel_data$initial[subset], + weights = submodel_data$weights[subset] ) } } @@ -191,7 +194,7 @@ tmle3_Update <- R6Class( training_task <- submodel_data$tmle_task training_fold <- submodel_data$fold_number # Subset to only numericals needed for fitting. - submodel_data <- submodel_data[c("observed", "H", "initial")] + submodel_data <- submodel_data[c("observed", "H", "initial", "weights")] if (self$constrain_step) { ncol_H <- ncol(submodel_data$H) @@ -205,7 +208,7 @@ tmle3_Update <- R6Class( risk <- function(epsilon) { submodel_estimate <- self$apply_submodel(submodel, submodel_data, epsilon) - loss <- loss_function(submodel_estimate, submodel_data$observed, training_likelihood, training_task, training_fold) + loss <- loss_function(submodel_estimate, submodel_data$observed, weights = submodel_data$weights, likelihood = training_likelihood, tmle_task = training_task, fold_number = training_fold) mean(loss) } @@ -240,6 +243,7 @@ tmle3_Update <- R6Class( submodel_fit <- glm(observed ~ H - 1, submodel_data, offset = family_object$linkfun(submodel_data$initial), family = family_object, + weights = submodel_data$weights, start = rep(0, ncol(submodel_data$H)) ) }) @@ -249,7 +253,7 @@ tmle3_Update <- R6Class( submodel_fit <- glm(observed ~ -1, submodel_data, offset = family_object$linkfun(submodel_data$initial), family = family_object, - weights = as.numeric(H), + weights = as.numeric(H) * submodel_data$weights, start = rep(0, ncol(submodel_data$H)) ) }) @@ -262,6 +266,7 @@ tmle3_Update <- R6Class( submodel_fit <- glm(observed ~ H - 1, submodel_data, offset = family_object$linkfun(submodel_data$initial), family = family_object, + weights = submodel_data$weights, start = rep(0, ncol(submodel_data$H)) ) }) From 97b765e5c291c20517d66f9aa44f4ae901e6241b Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Sun, 5 Sep 2021 13:51:54 -0700 Subject: [PATCH 05/65] supports general submodel and weights. JK on previous commit --- R/Param_survival.R | 3 ++- tests/testthat/test-survival.R | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/Param_survival.R b/R/Param_survival.R index 66baf11a..3a9eba9e 100644 --- a/R/Param_survival.R +++ b/R/Param_survival.R @@ -200,6 +200,7 @@ Param_survival <- R6Class( .cf_likelihood = NULL, .supports_outcome_censoring = TRUE, .times = NULL, - .target_times = NULL + .target_times = NULL, + .submodel = list("N" = "binomial_logit") ) ) diff --git a/tests/testthat/test-survival.R b/tests/testthat/test-survival.R index b5df62ce..e8c46121 100644 --- a/tests/testthat/test-survival.R +++ b/tests/testthat/test-survival.R @@ -40,12 +40,11 @@ survival_spec <- tmle_survival( tmle_task <- survival_spec$make_tmle_task(df_long, node_list) initial_likelihood <- survival_spec$make_initial_likelihood(tmle_task, learner_list) -up <- tmle3_Update_survival$new( +up <- tmle3_Update$new( maxit = 3e1, cvtmle = TRUE, convergence_type = "scaled_var", delta_epsilon = 1e-2, - fit_method = "l2", use_best = TRUE, verbose = TRUE ) From 3d252ac129b61cd1567fe5d7b4b68d7f7a5efd53 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Sun, 5 Sep 2021 13:59:54 -0700 Subject: [PATCH 06/65] supports general submodel and weights. JK on previous commit --- R/Param_base.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/R/Param_base.R b/R/Param_base.R index 0e83c0f1..9c4aff9b 100644 --- a/R/Param_base.R +++ b/R/Param_base.R @@ -52,9 +52,15 @@ Param_base <- R6Class( cat(sprintf("%s: %s\n", class(self)[1], self$name)) }, supports_submodel = function(submodel_name, node) { + if(!(node%in% names(private$.submodel))) { + node <- "default" + } return(submodel_name %in% c(private$.submodel[[node]])) }, get_submodel_spec = function(update_node) { + if(!(update_node%in% names(private$.submodel))) { + update_node <- "default" + } return(get_submodel_spec(private$.submodel[[update_node]])) } ), @@ -88,7 +94,7 @@ Param_base <- R6Class( .outcome_node = NULL, .targeted = TRUE, .supports_outcome_censoring = FALSE, - .submodel = list("A" = "binomial_logit", "Y" = "binomial_logit") + .submodel = list("A" = "binomial_logit", "Y" = "binomial_logit", "default" = "binomial_logit") ) ) From a7a1ddade43d497aee98a24fed7e43b21292d78a Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Sun, 5 Sep 2021 14:18:37 -0700 Subject: [PATCH 07/65] fix ATT and ATC submodels --- NAMESPACE | 4 +++- R/Param_ATC.R | 3 ++- R/Param_ATT.R | 3 ++- R/submodels.R | 22 +++++++++++++++++++--- R/tmle3_Update.R | 4 ++-- man/submodel_exp.Rd | 2 +- man/submodel_linear.Rd | 2 +- man/submodel_logistic.Rd | 18 ------------------ man/submodel_logistic_switch.Rd | 18 ++++++++++++++++++ man/submodel_spec_logistic_switch.Rd | 16 ++++++++++++++++ tests/testthat/test-ATC.R | 1 + 11 files changed, 65 insertions(+), 28 deletions(-) delete mode 100644 man/submodel_logistic.Rd create mode 100644 man/submodel_logistic_switch.Rd create mode 100644 man/submodel_spec_logistic_switch.Rd diff --git a/NAMESPACE b/NAMESPACE index b0f34ab5..579e274d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -40,6 +40,7 @@ export(generate_submodel_from_family) export(get_propensity_scores) export(get_submodel_spec) export(loss_function_least_squares) +export(loss_function_loglik) export(loss_function_loglik_binomial) export(loss_function_poisson) export(make_CF_Likelihood) @@ -55,7 +56,8 @@ export(propensity_score_plot) export(propensity_score_table) export(submodel_exp) export(submodel_linear) -export(submodel_logistic) +export(submodel_logistic_switch) +export(submodel_spec_logistic_switch) export(summary_from_estimates) export(survival_tx_likelihood) export(survival_tx_npsem) diff --git a/R/Param_ATC.R b/R/Param_ATC.R index 0bdbc681..41cc4114 100644 --- a/R/Param_ATC.R +++ b/R/Param_ATC.R @@ -103,6 +103,7 @@ Param_ATC <- R6Class( .param_att = NULL, .outcome_node = NULL, .cf_likelihood_treatment = NULL, - .cf_likelihood_control = NULL + .cf_likelihood_control = NULL, + .submodel = list("A" = "logistic_switch", "Y" = "binomial_logit") ) ) diff --git a/R/Param_ATT.R b/R/Param_ATT.R index 5a13de2f..42b79cb9 100644 --- a/R/Param_ATT.R +++ b/R/Param_ATT.R @@ -154,6 +154,7 @@ Param_ATT <- R6Class( private = list( .type = "ATT", .cf_likelihood_treatment = NULL, - .cf_likelihood_control = NULL + .cf_likelihood_control = NULL, + .submodel = list("A" = "logistic_switch", "Y" = "binomial_logit") ) ) diff --git a/R/submodels.R b/R/submodels.R index 6b3c0f57..7d72ca12 100644 --- a/R/submodels.R +++ b/R/submodels.R @@ -7,14 +7,14 @@ generate_submodel_from_family <- function(family) { linkfun <- family$linkfun linkinv <- family$linkinv - submodel <- function(eps, offset, X) { + submodel <- function(eps, offset, X, observed) { linkinv(linkfun(offset) + X %*% eps) } return(submodel) } -#' Logistic Submodel Fluctuation +#' Logistic Submodel Fluctuation for likelihood (not conditional means) #' #' @param eps ... #' @param X ... @@ -24,7 +24,11 @@ generate_submodel_from_family <- function(family) { #' #' @export # -submodel_logistic <- generate_submodel_from_family(binomial()) +submodel_logistic_switch <- function(eps, offset, X, observed) { + offset <- ifelse(observed==1, offset, 1-offset) + output <- stats::plogis(stats::qlogis(offset) + X %*% eps) + output <- ifelse(observed==1, output, 1-output) +} #' Log likelihood loss for binary variables #' @@ -40,6 +44,14 @@ loss_function_loglik_binomial = function(estimate, observed, weights = NULL, lik } return(loss) } +#' @export +loss_function_loglik = function(estimate, observed, weights = NULL, likelihood = NULL, tmle_task = NULL, fold_number = NULL) { + loss <- -1 * log(estimate) + if(!is.null(weights)) { + loss <- weights * loss + } + return(loss) +} #' Linear (gaussian) Submodel Fluctuation #' @@ -156,3 +168,7 @@ get_submodel_spec <- function(name) { return(output) } +#' Submodel for binary outcomes where "initial" is a likelihood and not a conditional mean (e.g. for Param_ATC and Param_ATT for updating node `A`). +#' @export +submodel_spec_logistic_switch <- list(name = "logistic_switch", family = function(){stop("Does not support family-based updating. Please use optim instead.")}, submodel_function = submodel_logistic_switch, loss_function = loss_function_loglik) + diff --git a/R/tmle3_Update.R b/R/tmle3_Update.R index 9fe765b1..58594e9b 100644 --- a/R/tmle3_Update.R +++ b/R/tmle3_Update.R @@ -286,14 +286,14 @@ tmle3_Update <- R6Class( return(epsilon) }, - submodel = function(epsilon, initial, H) { + submodel = function(epsilon, initial, H, observed) { plogis(qlogis(initial) + H %*% epsilon) }, loss_function = function(estimate, observed) { -1 * ifelse(observed == 1, log(estimate), log(1 - estimate)) }, apply_submodel = function(submodel, submodel_data, epsilon) { - submodel(epsilon, submodel_data$initial, submodel_data$H) + submodel(epsilon, submodel_data$initial, submodel_data$H, submodel_data$observed) }, apply_update = function(tmle_task, likelihood, fold_number, new_epsilon, update_node) { diff --git a/man/submodel_exp.Rd b/man/submodel_exp.Rd index e665f266..9aa79177 100644 --- a/man/submodel_exp.Rd +++ b/man/submodel_exp.Rd @@ -4,7 +4,7 @@ \alias{submodel_exp} \title{Log-linear (Poisson) Submodel Fluctuation} \usage{ -submodel_exp(eps, offset, X) +submodel_exp(eps, offset, X, observed) } \arguments{ \item{eps}{...} diff --git a/man/submodel_linear.Rd b/man/submodel_linear.Rd index a3600e1b..f6e2f8ca 100644 --- a/man/submodel_linear.Rd +++ b/man/submodel_linear.Rd @@ -4,7 +4,7 @@ \alias{submodel_linear} \title{Linear (gaussian) Submodel Fluctuation} \usage{ -submodel_linear(eps, offset, X) +submodel_linear(eps, offset, X, observed) } \arguments{ \item{eps}{...} diff --git a/man/submodel_logistic.Rd b/man/submodel_logistic.Rd deleted file mode 100644 index ee641da5..00000000 --- a/man/submodel_logistic.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/submodels.R -\name{submodel_logistic} -\alias{submodel_logistic} -\title{Logistic Submodel Fluctuation} -\usage{ -submodel_logistic(eps, offset, X) -} -\arguments{ -\item{eps}{...} - -\item{offset}{...} - -\item{X}{...} -} -\description{ -Logistic Submodel Fluctuation -} diff --git a/man/submodel_logistic_switch.Rd b/man/submodel_logistic_switch.Rd new file mode 100644 index 00000000..79df8dce --- /dev/null +++ b/man/submodel_logistic_switch.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/submodels.R +\name{submodel_logistic_switch} +\alias{submodel_logistic_switch} +\title{Logistic Submodel Fluctuation for likelihood (not conditional means)} +\usage{ +submodel_logistic_switch(eps, offset, X, observed) +} +\arguments{ +\item{eps}{...} + +\item{offset}{...} + +\item{X}{...} +} +\description{ +Logistic Submodel Fluctuation for likelihood (not conditional means) +} diff --git a/man/submodel_spec_logistic_switch.Rd b/man/submodel_spec_logistic_switch.Rd new file mode 100644 index 00000000..d77754b3 --- /dev/null +++ b/man/submodel_spec_logistic_switch.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/submodels.R +\docType{data} +\name{submodel_spec_logistic_switch} +\alias{submodel_spec_logistic_switch} +\title{Submodel for binary outcomes where "initial" is a likelihood and not a conditional mean (e.g. for Param_ATC and Param_ATT for updating node \code{A}).} +\format{ +An object of class \code{list} of length 4. +} +\usage{ +submodel_spec_logistic_switch +} +\description{ +Submodel for binary outcomes where "initial" is a likelihood and not a conditional mean (e.g. for Param_ATC and Param_ATT for updating node \code{A}). +} +\keyword{datasets} diff --git a/tests/testthat/test-ATC.R b/tests/testthat/test-ATC.R index b29711ba..c5efbc36 100644 --- a/tests/testthat/test-ATC.R +++ b/tests/testthat/test-ATC.R @@ -62,6 +62,7 @@ tmle_params <- tmle_spec$make_params(tmle_task, targeted_likelihood) updater$tmle_params <- tmle_params atc <- tmle_params[[1]] + # fit tmle update tmle_fit <- fit_tmle3( tmle_task, targeted_likelihood, list(atc), updater From 742f1c9e909a5258dba620cff078028fde485cd1 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Sun, 5 Sep 2021 16:22:47 -0700 Subject: [PATCH 08/65] REFERENCE CHANGES HERE --- NAMESPACE | 3 + R/LF_known.R | 3 +- R/Lrnr_glm_semiparametric.R | 158 +++++++++++++++++ R/Param_spCATE.R | 161 ++++++++++++++++++ ...nt_helpers.R => helpers_point_treatment.R} | 28 ++- R/helpers_semiparametric.R | 25 +++ R/{survival_helpers.R => helpers_survival.R} | 0 R/submodels_semiparametric.R | 9 + man/Lrnr_glm_semiparametric.Rd | 26 +++ man/Param_ATC.Rd | 1 + man/Param_ATE.Rd | 1 + man/Param_ATT.Rd | 1 + man/Param_MSM.Rd | 1 + man/Param_TSM.Rd | 1 + man/Param_base.Rd | 1 + man/Param_delta.Rd | 1 + man/Param_mean.Rd | 1 + man/Param_spCATE.Rd | 63 +++++++ man/Param_stratified.Rd | 1 + man/Param_survival.Rd | 1 + man/define_param.Rd | 1 + man/point_tx.Rd | 17 +- man/survival_tx.Rd | 2 +- man/tmle3_Fit.Rd | 1 + vignettes/testing.Rmd | 50 ++++++ 25 files changed, 548 insertions(+), 9 deletions(-) create mode 100644 R/Lrnr_glm_semiparametric.R create mode 100644 R/Param_spCATE.R rename R/{point_treatment_helpers.R => helpers_point_treatment.R} (70%) create mode 100644 R/helpers_semiparametric.R rename R/{survival_helpers.R => helpers_survival.R} (100%) create mode 100644 R/submodels_semiparametric.R create mode 100644 man/Lrnr_glm_semiparametric.Rd create mode 100644 man/Param_spCATE.Rd create mode 100644 vignettes/testing.Rmd diff --git a/NAMESPACE b/NAMESPACE index 579e274d..b91d5d45 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,6 +11,7 @@ export(LF_static) export(LF_targeted) export(Likelihood) export(Likelihood_cache) +export(Lrnr_glm_semiparametric) export(Param_ATC) export(Param_ATE) export(Param_ATT) @@ -19,6 +20,7 @@ export(Param_TSM) export(Param_base) export(Param_delta) export(Param_mean) +export(Param_spCATE) export(Param_stratified) export(Param_survival) export(Targeted_Likelihood) @@ -112,6 +114,7 @@ importFrom(sl3,make_learner) importFrom(sl3,sl3_Task) importFrom(stats,aggregate) importFrom(stats,cov) +importFrom(stats,family) importFrom(stats,glm) importFrom(stats,median) importFrom(stats,p.adjust) diff --git a/R/LF_known.R b/R/LF_known.R index d5b15fdd..ffec8665 100644 --- a/R/LF_known.R +++ b/R/LF_known.R @@ -41,10 +41,11 @@ LF_known <- R6Class( class = TRUE, inherit = LF_base, public = list( - initialize = function(name, mean_fun = stub_known, density_fun = stub_known, ..., type = "density") { + initialize = function(name, mean_fun = stub_known, density_fun = stub_known, base_likelihood = NULL, ..., type = "density") { super$initialize(name, ..., type = type) private$.mean_fun <- mean_fun private$.density_fun <- density_fun + private$.base_likelihood <- base_likelihood }, get_mean = function(tmle_task, fold_number) { learner_task <- tmle_task$get_regression_task(self$name, scale = FALSE) diff --git a/R/Lrnr_glm_semiparametric.R b/R/Lrnr_glm_semiparametric.R new file mode 100644 index 00000000..c09ce74b --- /dev/null +++ b/R/Lrnr_glm_semiparametric.R @@ -0,0 +1,158 @@ +#' Semiparametric Generalized Linear Models +#' +#' This learner provides fitting procedures for generalized linear models using +#' \code{\link[stats]{glm.fit}}. +#' +#' @docType class +#' +#' @importFrom R6 R6Class +#' @importFrom stats glm predict family +#' +#' @export +#' +#' @keywords data +#' +#' @return Learner object with methods for training and prediction. See +#' \code{\link{Lrnr_base}} for documentation on learners. +#' +#' @format \code{\link{R6Class}} object. +#' +#' @family Learners +#' +#' @section Parameters: +#' \describe{ +#' \item{\code{...}}{Parameters passed to \code{\link[stats]{glm}}.} +#' } +#' +# +Lrnr_glm_semiparametric <- R6Class( + classname = "Lrnr_glm_semiparametric", inherit = Lrnr_base, + portable = TRUE, class = TRUE, + public = list( + initialize = function(formula_sp, lrnr_baseline, interaction_variable = "A", family = NULL, append_interaction_matrix = TRUE, return_matrix_predictions = F, ...) { + params <- args_to_list() + super$initialize(params = params, ...) + } + ), + + private = list( + .properties = c("continuous", "binomial", "weights", "offset"), + + .train = function(task) { + + args <- self$params + append_interaction_matrix <- args$append_interaction_matrix + outcome_type <- self$get_outcome_type(task) + trt <- args$interaction_variable + if(is.null(trt)) { + A <- rep(1, task$nrow) + } else { + A <- unlist(task$get_data(,trt)) + } + if(!all(A %in% c(0,1)) && !is.null(trt)) { + binary <- FALSE + } else { + binary <- TRUE + } + family <- args$family + lrnr_baseline <- args$lrnr_baseline + formula <- args$formula_sp + if (is.null(family)) { + family <- outcome_type$glm_family(return_object = TRUE) + } + # Interaction design matrix + Y <- task$Y + V <- model.matrix(formula, task$data) + colnames(V) <- paste0("V", 1:ncol(V)) + + covariates <- setdiff(task$nodes$covariates, trt) + + if(!append_interaction_matrix && binary) { + task_baseline <- task$next_in_chain(covariates = covariates) + lrnr_baseline <- lrnr_baseline$train(task_baseline[A==0]) + Q0 <- lrnr_baseline$predict(task_baseline) + beta <- coef(glm.fit(A*V, Y, offset = family$linkfun(Q0), intercept = F, weights = task$weights, family = family)) + Q1 <- family$linkinv(family$linkfun(Q0) + V%*%beta) + Q <- ifelse(A==1, Q1, Q0) + } else { + + covariates <- setdiff(task$nodes$covariates, trt) + + if(append_interaction_matrix) { + AV <- as.data.table(A*V) + X <- cbind(task$X[,covariates, with = F], AV) + X0 <- cbind(task$X[,covariates, with = F], 0*V) + } else { + X <- cbind(task$X[,covariates, with = F], A) + X0 <- cbind(task$X[,covariates, with = F], A*0) + } + + + column_names <- task$add_columns(X) + task_baseline <- task$next_in_chain(covariates = colnames(X), column_names = column_names ) + + column_names <- task$add_columns(X0) + task_baseline0 <- task$next_in_chain(covariates = colnames(X0), column_names = column_names ) + + lrnr_baseline <- lrnr_baseline$train(task_baseline) + Q <- lrnr_baseline$predict(task_baseline) + Q0 <- lrnr_baseline$predict(task_baseline0) + # Project onto model + + beta <- coef(glm.fit(A*V, Q, offset = family$linkfun(Q0), intercept = F, weights = task$weights, family = family)) + + } + + fit_object = list(beta = beta, lrnr_baseline = lrnr_baseline, covariates = covariates, family = family, formula = formula, + append_interaction_matrix = append_interaction_matrix, binary = binary, task_baseline = task_baseline) + return(fit_object) + }, + .predict = function(task) { + fit_object <- self$fit_object + append_interaction_matrix <- fit_object$append_interaction_matrix + binary <- fit_object$binary + beta <- fit_object$beta + lrnr_baseline <- fit_object$lrnr_baseline + covariates <- fit_object$covariates + family <- fit_object$family + formula <- fit_object$formula + + trt <- self$params$interaction_variable + if(is.null(trt)) { + A <- rep(1, task$nrow) + } else { + A <- unlist(task$get_data(,trt)) + } + V <- model.matrix(formula, task$data) + colnames(V) <- paste0("V", 1:ncol(V)) + + + if(!append_interaction_matrix && binary) { + task_baseline <- task$next_in_chain(covariates = covariates) + Q0 <- lrnr_baseline$predict(task_baseline) + } else { + if(append_interaction_matrix) { + X0 <- cbind(task$X[,covariates, with = F], 0*V) + } else { + X0 <- cbind(task$X[,covariates, with = F], 0) + } + column_names <- task$add_columns(X0) + task_baseline0 <- task$next_in_chain(covariates = colnames(X0), column_names = column_names ) + Q0 <- lrnr_baseline$predict(task_baseline0) + } + + Q1 <- family$linkinv(family$linkfun(Q0) + V%*%beta) + Q <- family$linkinv(family$linkfun(Q0) + A*V%*%beta) + if(self$params$return_matrix_predictions && binary) { + predictions <- cbind(Q0,Q1,Q) + colnames(predictions) <- c("A=0", "A=1", "A") + predictions <- sl3::pack_predictions(cbind(Q0,Q1)) + } else { + predictions <- Q + } + + + return(predictions) + } + ) +) diff --git a/R/Param_spCATE.R b/R/Param_spCATE.R new file mode 100644 index 00000000..e95126a4 --- /dev/null +++ b/R/Param_spCATE.R @@ -0,0 +1,161 @@ +#' Average Treatment Effect +#' +#' Parameter definition for the Average Treatment Effect (ATE). +#' @importFrom R6 R6Class +#' @importFrom uuid UUIDgenerate +#' @importFrom methods is +#' @family Parameters +#' @keywords data +#' +#' @return \code{Param_base} object +#' +#' @format \code{\link{R6Class}} object. +#' +#' @section Constructor: +#' \code{define_param(Param_ATT, observed_likelihood, intervention_list, ..., outcome_node)} +#' +#' \describe{ +#' \item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood +#' } +#' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. +#' } +#' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. +#' } +#' \item{\code{...}}{Not currently used. +#' } +#' \item{\code{outcome_node}}{character, the name of the node that should be treated as the outcome +#' } +#' } +#' + +#' @section Fields: +#' \describe{ +#' \item{\code{cf_likelihood_treatment}}{the counterfactual likelihood for the treatment +#' } +#' \item{\code{cf_likelihood_control}}{the counterfactual likelihood for the control +#' } +#' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention +#' } +#' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention +#' } +#' } +#' @export +Param_spCATE <- R6Class( + classname = "Param_spCATE", + portable = TRUE, + class = TRUE, + inherit = Param_base, + public = list( + initialize = function(observed_likelihood, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { + super$initialize(observed_likelihood, list(), outcome_node) + if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { + # add delta_Y=0 to intervention lists + outcome_censoring_node <- observed_likelihood$censoring_nodes[[outcome_node]] + censoring_intervention <- define_lf(LF_static, outcome_censoring_node, value = 1) + intervention_list_treatment <- c(intervention_list_treatment, censoring_intervention) + intervention_list_control <- c(intervention_list_control, censoring_intervention) + } + + private$.cf_likelihood_treatment <- CF_Likelihood$new(observed_likelihood, intervention_list_treatment) + private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) + }, + clever_covariates = function(tmle_task = NULL, fold_number = "full") { + if (is.null(tmle_task)) { + tmle_task <- self$observed_likelihood$training_task + } + + intervention_nodes <- union(names(self$intervention_list_treatment), names(self$intervention_list_control)) + + W <- tmle_task$get_tmle_node("W") + A <- tmle_task$get_tmle_node("A", format = TRUE)[[1]] + Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] + weights <- tmle_task$weights + g <- self$observed_likelihood$get_likelihoods(tmle_task, "A", fold_number) + g1 <- self$cf_likelihood_treatment$get_likelihoods(tmle_task, "A", fold_number) + g0 <- self$cf_likelihood_control$get_likelihoods(tmle_task, "A", fold_number) + Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + Q0 <- Q_packed[[1]] + Q1 <- Q_packed[[2]] + Q <- Q_packed[[3]] + beta <- get_beta(W, A, formula, Q1, Q0, family = gaussian(), weights = weights) + + HA_treatment <- cf_pA_treatment / pA + HA_control <- cf_pA_control / pA + + # collapse across multiple intervention nodes + if (!is.null(ncol(HA_treatment)) && ncol(HA_treatment) > 1) { + HA_treatment <- apply(HA_treatment, 1, prod) + } + + # collapse across multiple intervention nodes + if (!is.null(ncol(HA_control)) && ncol(HA_control) > 1) { + HA_control <- apply(HA_control, 1, prod) + } + + HA <- HA_treatment - HA_control + + HA <- bound(HA, c(-40, 40)) + return(list(Y = HA)) + }, + estimates = function(tmle_task = NULL, fold_number = "full") { + if (is.null(tmle_task)) { + tmle_task <- self$observed_likelihood$training_task + } + + intervention_nodes <- union(names(self$intervention_list_treatment), names(self$intervention_list_control)) + + # clever_covariates happen here (for this param) only, but this is repeated computation + HA <- self$clever_covariates(tmle_task, fold_number)[[self$outcome_node]] + + + # todo: make sure we support updating these params + pA <- self$observed_likelihood$get_likelihoods(tmle_task, intervention_nodes, fold_number) + cf_pA_treatment <- self$cf_likelihood_treatment$get_likelihoods(tmle_task, intervention_nodes, fold_number) + cf_pA_control <- self$cf_likelihood_control$get_likelihoods(tmle_task, intervention_nodes, fold_number) + + # todo: extend for stochastic + cf_task_treatment <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] + cf_task_control <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] + + Y <- tmle_task$get_tmle_node(self$outcome_node, impute_censoring = TRUE) + + EY <- self$observed_likelihood$get_likelihood(tmle_task, self$outcome_node, fold_number) + EY1 <- self$observed_likelihood$get_likelihood(cf_task_treatment, self$outcome_node, fold_number) + EY0 <- self$observed_likelihood$get_likelihood(cf_task_control, self$outcome_node, fold_number) + + psi <- mean(EY1 - EY0) + + IC <- HA * (Y - EY) + (EY1 - EY0) - psi + + result <- list(psi = psi, IC = IC) + return(result) + } + ), + active = list( + name = function() { + param_form <- sprintf("ATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + return(param_form) + }, + cf_likelihood_treatment = function() { + return(private$.cf_likelihood_treatment) + }, + cf_likelihood_control = function() { + return(private$.cf_likelihood_control) + }, + intervention_list_treatment = function() { + return(self$cf_likelihood_treatment$intervention_list) + }, + intervention_list_control = function() { + return(self$cf_likelihood_control$intervention_list) + }, + update_nodes = function() { + return(c(self$outcome_node)) + } + ), + private = list( + .type = "ATE", + .cf_likelihood_treatment = NULL, + .cf_likelihood_control = NULL, + .supports_outcome_censoring = TRUE + ) +) diff --git a/R/point_treatment_helpers.R b/R/helpers_point_treatment.R similarity index 70% rename from R/point_treatment_helpers.R rename to R/helpers_point_treatment.R index 2f297bf3..68b69adb 100644 --- a/R/point_treatment_helpers.R +++ b/R/helpers_point_treatment.R @@ -10,23 +10,26 @@ #' @param ... extra arguments. #' @export #' @rdname point_tx -point_tx_npsem <- function(node_list, variable_types = NULL) { +point_tx_npsem <- function(node_list, variable_types = NULL, scale_outcome = TRUE, include_variance_node = FALSE) { # make tmle_task npsem <- list( define_node("W", node_list$W, variable_type = variable_types$W), define_node("A", node_list$A, c("W"), variable_type = variable_types$A), - define_node("Y", node_list$Y, c("A", "W"), variable_type = variable_types$Y, scale = TRUE) + define_node("Y", node_list$Y, c("A", "W"), variable_type = variable_types$Y, scale = scale_outcome) ) + if(include_variance_node) { + npsem$var_Y <- define_node("var_Y", node_list$Y, c("A", "W"), variable_type = variable_types$var_Y, scale = FALSE) + } return(npsem) } #' @export #' @rdname point_tx -point_tx_task <- function(data, node_list, variable_types = NULL, ...) { +point_tx_task <- function(data, node_list, variable_types = NULL, scale_outcome = TRUE, ...) { setDT(data) - npsem <- point_tx_npsem(node_list, variable_types) + npsem <- point_tx_npsem(node_list, variable_types, scale_outcome) if (!is.null(node_list$id)) { tmle_task <- tmle3_Task$new(data, npsem = npsem, id = node_list$id, ...) @@ -82,5 +85,22 @@ point_tx_likelihood <- function(tmle_task, learner_list) { likelihood_def <- Likelihood$new(factor_list) likelihood <- likelihood_def$train(tmle_task) + + if("var_Y" %in% names(tmle_task$npsem)) { + task_generator <- function(tmle_task, base_likelihood) { + EY <- sl3::unpack_predictions(base_likelihood$get_likelihood(tmle_task, "Y")) + EY <- EY[, ncol(EY)] + Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] + outcome <- (Y-EY)^2 + task <- tmle_task$get_regression_task("Y") + column_names <- task$add_columns(data.table("var_Y" = outcome)) + task <- task$next_in_chain(outcome = "var_Y", column_names = column_names ) + } + if(tmle_task$npsem[["Y"]]$variable_type == "binomial") { + LF_known$new("var_Y", , type = "mean") + } else { + LF_derived$new("var_Y", learner_list[["var_Y"]], likelihood, task_generator = task_generator , type = "mean") + } + } return(likelihood) } diff --git a/R/helpers_semiparametric.R b/R/helpers_semiparametric.R new file mode 100644 index 00000000..061a1496 --- /dev/null +++ b/R/helpers_semiparametric.R @@ -0,0 +1,25 @@ + +get_beta <- function(W, A, formula, Q1, Q0, family, weights = NULL) { + W <- as.matrix(W) + if(is.null(weights)) { + weights <- rep(1, nrow(W)) + } + V <- model.matrix(formula, as.data.frame(W)) + Q <- ifelse(A==1, Q1, Q0) + beta <- coef(glm.fit(A*V, Q, offset = family$linkfun(Q0), family = family, intercept = F, weights = weights)) + return(beta) +} + +project_onto_model <- function(W, A, formula, Q1, Q0, family, weights = NULL) { + beta <- get_beta(W, A, formula, Q1, Q0, family, weights) + V <- model.matrix(formula, as.data.frame(W)) + Q1 <- family$linkinv(family$linkfun(Q0) + V %*% beta) + Q <- ifelse(A==1, Q1, Q0) + return(cbind(Q0,Q1, Q)) +} + + + + + + diff --git a/R/survival_helpers.R b/R/helpers_survival.R similarity index 100% rename from R/survival_helpers.R rename to R/helpers_survival.R diff --git a/R/submodels_semiparametric.R b/R/submodels_semiparametric.R new file mode 100644 index 00000000..6be3f486 --- /dev/null +++ b/R/submodels_semiparametric.R @@ -0,0 +1,9 @@ + + +submodel_function <- function(eps, offset, X, observed) { + offset_unpacked <- sl3::unpack_predictions(offset) + Q0 <- offset_unpacked[[1]] + Q1 <- offset_unpacked[[2]] + Q <- offset_unpacked[[3]] + +} diff --git a/man/Lrnr_glm_semiparametric.Rd b/man/Lrnr_glm_semiparametric.Rd new file mode 100644 index 00000000..7f556c41 --- /dev/null +++ b/man/Lrnr_glm_semiparametric.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Lrnr_glm_semiparametric.R +\docType{class} +\name{Lrnr_glm_semiparametric} +\alias{Lrnr_glm_semiparametric} +\title{Semiparametric Generalized Linear Models} +\format{ +\code{\link{R6Class}} object. +} +\value{ +Learner object with methods for training and prediction. See +\code{\link{Lrnr_base}} for documentation on learners. +} +\description{ +This learner provides fitting procedures for generalized linear models using +\code{\link[stats]{glm.fit}}. +} +\section{Parameters}{ + +\describe{ +\item{\code{...}}{Parameters passed to \code{\link[stats]{glm}}.} +} +} + +\concept{Learners} +\keyword{data} diff --git a/man/Param_ATC.Rd b/man/Param_ATC.Rd index ee9d2a0c..94212aff 100644 --- a/man/Param_ATC.Rd +++ b/man/Param_ATC.Rd @@ -63,6 +63,7 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_spCATE}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{define_param}()}, diff --git a/man/Param_ATE.Rd b/man/Param_ATE.Rd index 0550f82e..259e8c8d 100644 --- a/man/Param_ATE.Rd +++ b/man/Param_ATE.Rd @@ -53,6 +53,7 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_spCATE}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{define_param}()}, diff --git a/man/Param_ATT.Rd b/man/Param_ATT.Rd index fa77e4f7..b129e98b 100644 --- a/man/Param_ATT.Rd +++ b/man/Param_ATT.Rd @@ -63,6 +63,7 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_spCATE}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{define_param}()}, diff --git a/man/Param_MSM.Rd b/man/Param_MSM.Rd index 9086688c..a754fb9c 100644 --- a/man/Param_MSM.Rd +++ b/man/Param_MSM.Rd @@ -61,6 +61,7 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_spCATE}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{define_param}()}, diff --git a/man/Param_TSM.Rd b/man/Param_TSM.Rd index fb1c5360..50092923 100644 --- a/man/Param_TSM.Rd +++ b/man/Param_TSM.Rd @@ -57,6 +57,7 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_spCATE}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{define_param}()}, diff --git a/man/Param_base.Rd b/man/Param_base.Rd index 2dbed3da..4c6b8fd2 100644 --- a/man/Param_base.Rd +++ b/man/Param_base.Rd @@ -71,6 +71,7 @@ Other Parameters: \code{\link{Param_TSM}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_spCATE}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{define_param}()}, diff --git a/man/Param_delta.Rd b/man/Param_delta.Rd index 54e5aac0..50a31c93 100644 --- a/man/Param_delta.Rd +++ b/man/Param_delta.Rd @@ -18,6 +18,7 @@ Other Parameters: \code{\link{Param_TSM}}, \code{\link{Param_base}}, \code{\link{Param_mean}}, +\code{\link{Param_spCATE}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{define_param}()}, diff --git a/man/Param_mean.Rd b/man/Param_mean.Rd index 91e7c288..570e4e90 100644 --- a/man/Param_mean.Rd +++ b/man/Param_mean.Rd @@ -46,6 +46,7 @@ Other Parameters: \code{\link{Param_TSM}}, \code{\link{Param_base}}, \code{\link{Param_delta}}, +\code{\link{Param_spCATE}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{define_param}()}, diff --git a/man/Param_spCATE.Rd b/man/Param_spCATE.Rd new file mode 100644 index 00000000..bf5dabb5 --- /dev/null +++ b/man/Param_spCATE.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Param_spCATE.R +\name{Param_spCATE} +\alias{Param_spCATE} +\title{Average Treatment Effect} +\format{ +\code{\link{R6Class}} object. +} +\value{ +\code{Param_base} object +} +\description{ +Parameter definition for the Average Treatment Effect (ATE). +} +\section{Constructor}{ + +\code{define_param(Param_ATT, observed_likelihood, intervention_list, ..., outcome_node)} + +\describe{ +\item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood +} +\item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. +} +\item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. +} +\item{\code{...}}{Not currently used. +} +\item{\code{outcome_node}}{character, the name of the node that should be treated as the outcome +} +} +} + +\section{Fields}{ + +\describe{ +\item{\code{cf_likelihood_treatment}}{the counterfactual likelihood for the treatment +} +\item{\code{cf_likelihood_control}}{the counterfactual likelihood for the control +} +\item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention +} +\item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention +} +} +} + +\seealso{ +Other Parameters: +\code{\link{Param_ATC}}, +\code{\link{Param_ATE}}, +\code{\link{Param_ATT}}, +\code{\link{Param_MSM}}, +\code{\link{Param_TSM}}, +\code{\link{Param_base}}, +\code{\link{Param_delta}}, +\code{\link{Param_mean}}, +\code{\link{Param_stratified}}, +\code{\link{Param_survival}}, +\code{\link{define_param}()}, +\code{\link{tmle3_Fit}} +} +\concept{Parameters} +\keyword{data} diff --git a/man/Param_stratified.Rd b/man/Param_stratified.Rd index bb54b268..8764afcb 100644 --- a/man/Param_stratified.Rd +++ b/man/Param_stratified.Rd @@ -57,6 +57,7 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_spCATE}}, \code{\link{Param_survival}}, \code{\link{define_param}()}, \code{\link{tmle3_Fit}} diff --git a/man/Param_survival.Rd b/man/Param_survival.Rd index 45ea0ceb..0e5c195e 100644 --- a/man/Param_survival.Rd +++ b/man/Param_survival.Rd @@ -48,6 +48,7 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_spCATE}}, \code{\link{Param_stratified}}, \code{\link{define_param}()}, \code{\link{tmle3_Fit}} diff --git a/man/define_param.Rd b/man/define_param.Rd index 720aed88..b874ecd5 100644 --- a/man/define_param.Rd +++ b/man/define_param.Rd @@ -24,6 +24,7 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_spCATE}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{tmle3_Fit}} diff --git a/man/point_tx.Rd b/man/point_tx.Rd index 046298b3..4d817830 100644 --- a/man/point_tx.Rd +++ b/man/point_tx.Rd @@ -1,14 +1,25 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/point_treatment_helpers.R +% Please edit documentation in R/helpers_point_treatment.R \name{point_tx_npsem} \alias{point_tx_npsem} \alias{point_tx_task} \alias{point_tx_likelihood} \title{Helper Functions for Point Treatment} \usage{ -point_tx_npsem(node_list, variable_types = NULL) +point_tx_npsem( + node_list, + variable_types = NULL, + scale_outcome = TRUE, + include_variance_node = FALSE +) -point_tx_task(data, node_list, variable_types = NULL, ...) +point_tx_task( + data, + node_list, + variable_types = NULL, + scale_outcome = TRUE, + ... +) point_tx_likelihood(tmle_task, learner_list) } diff --git a/man/survival_tx.Rd b/man/survival_tx.Rd index 79587b5d..5630e78a 100644 --- a/man/survival_tx.Rd +++ b/man/survival_tx.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/survival_helpers.R +% Please edit documentation in R/helpers_survival.R \name{survival_tx_npsem} \alias{survival_tx_npsem} \alias{survival_tx_task} diff --git a/man/tmle3_Fit.Rd b/man/tmle3_Fit.Rd index dc0d218e..838a9091 100644 --- a/man/tmle3_Fit.Rd +++ b/man/tmle3_Fit.Rd @@ -105,6 +105,7 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_spCATE}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{define_param}()} diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd new file mode 100644 index 00000000..4b8ef07e --- /dev/null +++ b/vignettes/testing.Rmd @@ -0,0 +1,50 @@ +--- +title: "test" +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +## R Markdown + + +```{r} +library(sl3) +n <- 200 +W <- runif(n, -1, 1) +A <- rbinom(n, size = 1, prob = plogis(W)) +Y <- rnorm(n, mean = A+W, sd = 0.5) +data <- data.table(W,A,Y) +lrnr_Y0W <- Lrnr_glm$new() +lrnr_A <- Lrnr_glm$new() +lrnr_sp <- Lrnr_glm_semiparametric$new(formula_sp=~1, lrnr_Y0W, interaction_variable = "A", family = gaussian(), return_matrix_predictions = TRUE) + +node_list <- list (W = "W", A = "A", Y= "Y") + +tmle_task <- point_tx_task(data, node_list, scale_outcome = F) + +tmle_task$get_tmle_node("Y", format = T) + +learner_list <- list(Y = lrnr_sp, A = lrnr_A) +likelihood <- point_tx_likelihood(tmle_task, learner_list) +unpacked <- (sl3::unpack_predictions(likelihood$get_likelihood(tmle_task, "Y" ))) +quantile(apply(unpacked,1,diff)) +``` + + + + +```{r} +data <- data.table(W,A,Y) +head(data) + +task <- sl3_Task$new(data,covariates = c("W" , "A"), outcome = "Y") +lrnr_sp <- Lrnr_glm_semiparametric$new(formula_sp=~1, Lrnr_glmnet$new(), family = binomial()) +lrnr_sp <- lrnr_sp$train(task) +lrnr_sp$fit_object$beta +lrnr_sp$fit_object$lrnr_baseline$fit_object$coefficients +data.table(lrnr_sp$predict(task), Lrnr_glm$new()$train(task)$predict(task)) + +```` From 64583b0ed3e5385b72ac38221dd240d6ef470bc2 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Sun, 5 Sep 2021 17:22:36 -0700 Subject: [PATCH 09/65] towards spCATE --- R/LF_known.R | 18 +++++-- R/Lrnr_glm_semiparametric.R | 2 +- R/Param_base.R | 3 ++ R/Param_spCATE.R | 98 ++++++++++++++++++++----------------- R/helpers_point_treatment.R | 38 +++++++++----- R/helpers_semiparametric.R | 3 ++ R/tmle3_Task.R | 2 +- R/tmle3_Update.R | 31 +++++++++++- man/point_tx.Rd | 1 + vignettes/testing.Rmd | 18 +++++-- 10 files changed, 145 insertions(+), 69 deletions(-) diff --git a/R/LF_known.R b/R/LF_known.R index ffec8665..7a241042 100644 --- a/R/LF_known.R +++ b/R/LF_known.R @@ -49,13 +49,21 @@ LF_known <- R6Class( }, get_mean = function(tmle_task, fold_number) { learner_task <- tmle_task$get_regression_task(self$name, scale = FALSE) - preds <- self$mean_fun(learner_task) + if(!is.null(self$base_likelihood)) { + preds <- self$mean_fun(learner_task, tmle_task, self$base_likelihood) + } else { + preds <- self$mean_fun(learner_task) + } return(preds) }, get_density = function(tmle_task, fold_number) { learner_task <- tmle_task$get_regression_task(self$name, scale = FALSE) - preds <- self$density_fun(learner_task) + if(!is.null(self$base_likelihood)) { + preds <- self$density_fun(learner_task, tmle_task, self$base_likelihood) + } else { + preds <- self$density_fun(learner_task) + } outcome_type <- learner_task$outcome_type observed <- outcome_type$format(learner_task$Y) @@ -80,11 +88,15 @@ LF_known <- R6Class( density_fun = function() { return(private$.density_fun) + }, + base_likelihood = function(){ + return(private$.base_likelihood) } ), private = list( .name = NULL, .mean_fun = NULL, - .density_fun = NULL + .density_fun = NULL, + .base_likelihood = NULL ) ) diff --git a/R/Lrnr_glm_semiparametric.R b/R/Lrnr_glm_semiparametric.R index c09ce74b..bc17b269 100644 --- a/R/Lrnr_glm_semiparametric.R +++ b/R/Lrnr_glm_semiparametric.R @@ -146,7 +146,7 @@ Lrnr_glm_semiparametric <- R6Class( if(self$params$return_matrix_predictions && binary) { predictions <- cbind(Q0,Q1,Q) colnames(predictions) <- c("A=0", "A=1", "A") - predictions <- sl3::pack_predictions(cbind(Q0,Q1)) + predictions <- sl3::pack_predictions(predictions) } else { predictions <- Q } diff --git a/R/Param_base.R b/R/Param_base.R index 9c4aff9b..2d84f5dc 100644 --- a/R/Param_base.R +++ b/R/Param_base.R @@ -86,6 +86,9 @@ Param_base <- R6Class( }, submodel = function() { return(private$.submodel) + }, + weights = function(){ + return(self$observed_likelihood$training_task$weights) } ), private = list( diff --git a/R/Param_spCATE.R b/R/Param_spCATE.R index e95126a4..85847b41 100644 --- a/R/Param_spCATE.R +++ b/R/Param_spCATE.R @@ -46,7 +46,7 @@ Param_spCATE <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { + initialize = function(observed_likelihood, formula_CATE =~ 1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists @@ -55,79 +55,82 @@ Param_spCATE <- R6Class( intervention_list_treatment <- c(intervention_list_treatment, censoring_intervention) intervention_list_control <- c(intervention_list_control, censoring_intervention) } - + private$.formula_CATE <- formula_CATE private$.cf_likelihood_treatment <- CF_Likelihood$new(observed_likelihood, intervention_list_treatment) private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) }, clever_covariates = function(tmle_task = NULL, fold_number = "full") { + + + training_task <- self$observed_likelihood$training_task if (is.null(tmle_task)) { - tmle_task <- self$observed_likelihood$training_task + tmle_task <- training_task + } + if(training_task$uuid == tmle_task$uuid){ + is_training_task <- TRUE } + cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] + cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] intervention_nodes <- union(names(self$intervention_list_treatment), names(self$intervention_list_control)) W <- tmle_task$get_tmle_node("W") + V <- model.matrix(self$formula_CATE, as.data.frame(W)) A <- tmle_task$get_tmle_node("A", format = TRUE)[[1]] Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] - weights <- tmle_task$weights + g <- self$observed_likelihood$get_likelihoods(tmle_task, "A", fold_number) - g1 <- self$cf_likelihood_treatment$get_likelihoods(tmle_task, "A", fold_number) - g0 <- self$cf_likelihood_control$get_likelihoods(tmle_task, "A", fold_number) + g1 <- ifelse(A==1, g, 1-g) + g0 <- 1-g1 Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) Q0 <- Q_packed[[1]] Q1 <- Q_packed[[2]] Q <- Q_packed[[3]] - beta <- get_beta(W, A, formula, Q1, Q0, family = gaussian(), weights = weights) - - HA_treatment <- cf_pA_treatment / pA - HA_control <- cf_pA_control / pA - - # collapse across multiple intervention nodes - if (!is.null(ncol(HA_treatment)) && ncol(HA_treatment) > 1) { - HA_treatment <- apply(HA_treatment, 1, prod) - } - - # collapse across multiple intervention nodes - if (!is.null(ncol(HA_control)) && ncol(HA_control) > 1) { - HA_control <- apply(HA_control, 1, prod) + #Extract current semiparametric coef + beta <- get_beta(W, A, self$formula_CATE, Q1, Q0, family = gaussian(), weights = weights) + # Get conditional variances + var_Y <- self$cf_likelihood_treatment$get_likelihoods(tmle_task, "var_Y", fold_number) + var_Y0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "var_Y", fold_number) + var_Y1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "var_Y", fold_number) + + gradM <- V + num <- gradM * ( g1/var_Y1) + denom <- (g0/ var_Y0 + g1/var_Y1) + hstar <- - num/denom + H <- (A*gradM + hstar) /var_Y + EIF <- as.matrix(H * (Y-Q)) + + # Store EIF component + if(is_training_task) { + EIF_Y <- self$weights * as.matrix(H * (Y-Q)) + } else { + EIF_Y <- NULL } - HA <- HA_treatment - HA_control - - HA <- bound(HA, c(-40, 40)) - return(list(Y = HA)) + return(list(Y = H, EIF = list(Y = EIF_Y))) }, estimates = function(tmle_task = NULL, fold_number = "full") { if (is.null(tmle_task)) { tmle_task <- self$observed_likelihood$training_task } - intervention_nodes <- union(names(self$intervention_list_treatment), names(self$intervention_list_control)) + W <- tmle_task$get_tmle_node("W") + A <- tmle_task$get_tmle_node("A", format = TRUE)[[1]] + Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] # clever_covariates happen here (for this param) only, but this is repeated computation - HA <- self$clever_covariates(tmle_task, fold_number)[[self$outcome_node]] - - - # todo: make sure we support updating these params - pA <- self$observed_likelihood$get_likelihoods(tmle_task, intervention_nodes, fold_number) - cf_pA_treatment <- self$cf_likelihood_treatment$get_likelihoods(tmle_task, intervention_nodes, fold_number) - cf_pA_control <- self$cf_likelihood_control$get_likelihoods(tmle_task, intervention_nodes, fold_number) - - # todo: extend for stochastic - cf_task_treatment <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] - cf_task_control <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] - - Y <- tmle_task$get_tmle_node(self$outcome_node, impute_censoring = TRUE) + EIF <- self$clever_covariates(tmle_task, fold_number)$EIF$Y - EY <- self$observed_likelihood$get_likelihood(tmle_task, self$outcome_node, fold_number) - EY1 <- self$observed_likelihood$get_likelihood(cf_task_treatment, self$outcome_node, fold_number) - EY0 <- self$observed_likelihood$get_likelihood(cf_task_control, self$outcome_node, fold_number) - - psi <- mean(EY1 - EY0) + Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + Q0 <- Q_packed[[1]] + Q1 <- Q_packed[[2]] + Q <- Q_packed[[3]] + beta <- get_beta(W, A, self$formula_CATE, Q1, Q0, family = gaussian(), weights = weights) + CATE <- Q1 - Q0 - IC <- HA * (Y - EY) + (EY1 - EY0) - psi + IC <- EIF - result <- list(psi = psi, IC = IC) + result <- list(psi = beta, IC = IC, CATE = CATE) return(result) } ), @@ -150,12 +153,17 @@ Param_spCATE <- R6Class( }, update_nodes = function() { return(c(self$outcome_node)) + }, + formula_CATE = function(){ + return(private$.formula_CATE) } ), private = list( .type = "ATE", .cf_likelihood_treatment = NULL, .cf_likelihood_control = NULL, - .supports_outcome_censoring = TRUE + .supports_outcome_censoring = TRUE, + .formula_CATE = NULL, + .submodel = list(Y = "gaussian_linear") ) ) diff --git a/R/helpers_point_treatment.R b/R/helpers_point_treatment.R index 68b69adb..72f2d0b4 100644 --- a/R/helpers_point_treatment.R +++ b/R/helpers_point_treatment.R @@ -26,10 +26,10 @@ point_tx_npsem <- function(node_list, variable_types = NULL, scale_outcome = TRU #' @export #' @rdname point_tx -point_tx_task <- function(data, node_list, variable_types = NULL, scale_outcome = TRUE, ...) { +point_tx_task <- function(data, node_list, variable_types = NULL, scale_outcome = TRUE, include_variance_node = FALSE, ...) { setDT(data) - npsem <- point_tx_npsem(node_list, variable_types, scale_outcome) + npsem <- point_tx_npsem(node_list, variable_types, scale_outcome, include_variance_node) if (!is.null(node_list$id)) { tmle_task <- tmle3_Task$new(data, npsem = npsem, id = node_list$id, ...) @@ -86,21 +86,33 @@ point_tx_likelihood <- function(tmle_task, learner_list) { likelihood_def <- Likelihood$new(factor_list) likelihood <- likelihood_def$train(tmle_task) + # If conditional variance needs to be estimated, do so. if("var_Y" %in% names(tmle_task$npsem)) { - task_generator <- function(tmle_task, base_likelihood) { - EY <- sl3::unpack_predictions(base_likelihood$get_likelihood(tmle_task, "Y")) - EY <- EY[, ncol(EY)] - Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] - outcome <- (Y-EY)^2 - task <- tmle_task$get_regression_task("Y") - column_names <- task$add_columns(data.table("var_Y" = outcome)) - task <- task$next_in_chain(outcome = "var_Y", column_names = column_names ) + if(is.null(learner_list[["var_Y"]])) { + learner_list[["var_Y"]] <- Lrnr_glmnet$new(family = "poisson") + warning("Node var_Y is in npsem but no learner is provided in `learner_list`. Defaulting to glmnet with `poisson` family.") } - if(tmle_task$npsem[["Y"]]$variable_type == "binomial") { - LF_known$new("var_Y", , type = "mean") + if(tmle_task$npsem[["Y"]]$variable_type$type == "binomial") { + mean_fun <- function(task, likelihood, tmle_task) { + EY <- sl3::unpack_predictions(likelihood$get_likelihood(tmle_task, "Y")) + EY <- EY[, ncol(EY)] + return(EY * (1-EY)) + } + LF_var_Y <- LF_known$new("var_Y", mean_fun = mean_fun , base_likelihood = likelihood, type = "mean") } else { - LF_derived$new("var_Y", learner_list[["var_Y"]], likelihood, task_generator = task_generator , type = "mean") + task_generator <- function(tmle_task, base_likelihood) { + EY <- sl3::unpack_predictions(base_likelihood$get_likelihood(tmle_task, "Y")) + EY <- EY[, ncol(EY)] + Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] + outcome <- (Y-EY)^2 + task <- tmle_task$get_regression_task("Y") + column_names <- task$add_columns(data.table("var_Y" = outcome)) + task <- task$next_in_chain(outcome = "var_Y", column_names = column_names ) + } + LF_var_Y <- LF_derived$new("var_Y", learner_list[["var_Y"]], likelihood, task_generator = task_generator , type = "mean") } + likelihood$add_factors(LF_var_Y) } + return(likelihood) } diff --git a/R/helpers_semiparametric.R b/R/helpers_semiparametric.R index 061a1496..f1ed7735 100644 --- a/R/helpers_semiparametric.R +++ b/R/helpers_semiparametric.R @@ -23,3 +23,6 @@ project_onto_model <- function(W, A, formula, Q1, Q0, family, weights = NULL) { + + + diff --git a/R/tmle3_Task.R b/R/tmle3_Task.R index cd690bb8..4dbd57e7 100644 --- a/R/tmle3_Task.R +++ b/R/tmle3_Task.R @@ -348,7 +348,7 @@ tmle3_Task <- R6Class( return(private$.npsem) }, data = function() { - all_variables <- unlist(lapply(self$npsem, `[[`, "variables")) + all_variables <- unique(unlist(lapply(self$npsem, `[[`, "variables"))) self$get_data(columns = all_variables) } ), diff --git a/R/tmle3_Update.R b/R/tmle3_Update.R index 58594e9b..26882fdd 100644 --- a/R/tmle3_Update.R +++ b/R/tmle3_Update.R @@ -134,8 +134,23 @@ tmle3_Update <- R6Class( covariates_dt <- do.call(cbind, node_covariates) if (self$one_dimensional) { - observed_task <- likelihood$training_task - covariates_dt <- self$collapse_covariates(self$current_estimates, covariates_dt) + EIF_components <- NULL + tryCatch({ + EIF_components <-lapply(clever_covariates, function(item) { + item$EIF[[update_node]] + }) + EIF_components <- do.call(cbind, EIF_components) + if(length(EIF_components) ==0 || ncol(EIF_components) != ncol(covariates_dt)) { + stop("Not all params provide EIF components") + } + }, error = function(...){}) + if(is.null(EIF_components)) { + ED <- ED_from_estimates(self$current_estimates) + EDnormed <- ED / norm(ED, type = "2") + } + #covariates_dt <- self$collapse_covariates(self$current_estimates, covariates_dt) + } else { + EDnormed <- NULL } observed <- tmle_task$get_tmle_node(update_node) @@ -175,6 +190,7 @@ tmle3_Update <- R6Class( } } + submodel_data$EDnormed <- EDnormed submodel_data$submodel_spec <- submodel_spec # To support arbitrary likelihood-dependent risk functions for updating. # Is carrying this stuff around a problem computationally? @@ -186,6 +202,14 @@ tmle3_Update <- R6Class( }, fit_submodel = function(submodel_data) { # Extract submodel spec info + EDnormed <- submodel_data$EDnormed + if(!is.null(EDnormed)) { + # Collapse clever covariates + submodel_data$H <- as.matrix(submodel_data$H) %*% EDnormed + } else { + EDnormed <- 1 + } + submodel_spec <- submodel_data$submodel_spec family_object <- submodel_spec$family loss_function <- submodel_spec$loss_function @@ -284,6 +308,9 @@ tmle3_Update <- R6Class( cat(sprintf("(max) epsilon: %e ", max_eps)) } + # Convert univariate epsilon back to multivariate epsilon if needed. + # This is change allows us to store the actual epsilon in each update step (noting that EIF changes each iteration) + epsilon <- epsilon * EDnormed return(epsilon) }, submodel = function(epsilon, initial, H, observed) { diff --git a/man/point_tx.Rd b/man/point_tx.Rd index 4d817830..493d89e3 100644 --- a/man/point_tx.Rd +++ b/man/point_tx.Rd @@ -18,6 +18,7 @@ point_tx_task( node_list, variable_types = NULL, scale_outcome = TRUE, + include_variance_node = FALSE, ... ) diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index 4b8ef07e..f0ce6ead 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -17,20 +17,30 @@ W <- runif(n, -1, 1) A <- rbinom(n, size = 1, prob = plogis(W)) Y <- rnorm(n, mean = A+W, sd = 0.5) data <- data.table(W,A,Y) -lrnr_Y0W <- Lrnr_glm$new() +lrnr_Y0W <- Lrnr_glmnet$new() lrnr_A <- Lrnr_glm$new() lrnr_sp <- Lrnr_glm_semiparametric$new(formula_sp=~1, lrnr_Y0W, interaction_variable = "A", family = gaussian(), return_matrix_predictions = TRUE) node_list <- list (W = "W", A = "A", Y= "Y") -tmle_task <- point_tx_task(data, node_list, scale_outcome = F) +tmle_task <- point_tx_task(data, node_list, scale_outcome = F, include_variance_node = TRUE) tmle_task$get_tmle_node("Y", format = T) learner_list <- list(Y = lrnr_sp, A = lrnr_A) likelihood <- point_tx_likelihood(tmle_task, learner_list) -unpacked <- (sl3::unpack_predictions(likelihood$get_likelihood(tmle_task, "Y" ))) -quantile(apply(unpacked,1,diff)) + +updater <- tmle3_Update$new() +tlik <- Targeted_Likelihood$new(likelihood, updater = updater) +param_cate <- Param_spCATE$new(tlik, formula_CATE = ~1, intervention_list_treatment = list(define_lf(LF_static, "A", value = 1)), intervention_list_control = list(define_lf(LF_static, "A", value = 0))) + + +``` + +```{r} +param_cate$formula_CATE +updater <- tlik$updater +updater$update() ``` From 5a80be0c4c37f6425e7da68327fffe023911f3aa Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Sun, 5 Sep 2021 17:42:25 -0700 Subject: [PATCH 10/65] implemented spCATE PARAM --- R/Param_spCATE.R | 36 +++++++++++++++++++++++++----------- R/tmle3_Update.R | 21 ++++++++++++++++++--- vignettes/testing.Rmd | 9 ++++----- 3 files changed, 47 insertions(+), 19 deletions(-) diff --git a/R/Param_spCATE.R b/R/Param_spCATE.R index 85847b41..9cce2818 100644 --- a/R/Param_spCATE.R +++ b/R/Param_spCATE.R @@ -68,6 +68,8 @@ Param_spCATE <- R6Class( } if(training_task$uuid == tmle_task$uuid){ is_training_task <- TRUE + } else { + is_training_task <- FALSE } cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] @@ -82,12 +84,15 @@ Param_spCATE <- R6Class( g <- self$observed_likelihood$get_likelihoods(tmle_task, "A", fold_number) g1 <- ifelse(A==1, g, 1-g) g0 <- 1-g1 - Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) - Q0 <- Q_packed[[1]] - Q1 <- Q_packed[[2]] - Q <- Q_packed[[3]] + #Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + #Q0 <- Q_packed[[1]] + #Q1 <- Q_packed[[2]] + #Q <- Q_packed[[3]] + Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) + #Extract current semiparametric coef - beta <- get_beta(W, A, self$formula_CATE, Q1, Q0, family = gaussian(), weights = weights) + #print(data.table(Q1,Q0)) + #beta <- get_beta(W, A, self$formula_CATE, Q1, Q0, family = gaussian(), weights = weights) # Get conditional variances var_Y <- self$cf_likelihood_treatment$get_likelihoods(tmle_task, "var_Y", fold_number) var_Y0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "var_Y", fold_number) @@ -113,18 +118,27 @@ Param_spCATE <- R6Class( if (is.null(tmle_task)) { tmle_task <- self$observed_likelihood$training_task } + cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] + cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] W <- tmle_task$get_tmle_node("W") A <- tmle_task$get_tmle_node("A", format = TRUE)[[1]] Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] - + weights <- tmle_task$weights # clever_covariates happen here (for this param) only, but this is repeated computation EIF <- self$clever_covariates(tmle_task, fold_number)$EIF$Y + Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) + Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) + Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + Qtest <- ifelse(A==1, Q1, Q0) + if(!all(Qtest-Q==0)) { + stop("Q and Q1,Q0 dont match") + } + # Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + # Q0 <- Q_packed[[1]] + # Q1 <- Q_packed[[2]] + # Q <- Q_packed[[3]] - Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) - Q0 <- Q_packed[[1]] - Q1 <- Q_packed[[2]] - Q <- Q_packed[[3]] beta <- get_beta(W, A, self$formula_CATE, Q1, Q0, family = gaussian(), weights = weights) CATE <- Q1 - Q0 @@ -164,6 +178,6 @@ Param_spCATE <- R6Class( .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, .formula_CATE = NULL, - .submodel = list(Y = "gaussian_linear") + .submodel = list(Y = "gaussian_identity") ) ) diff --git a/R/tmle3_Update.R b/R/tmle3_Update.R index 26882fdd..89ff026c 100644 --- a/R/tmle3_Update.R +++ b/R/tmle3_Update.R @@ -56,7 +56,7 @@ tmle3_Update <- R6Class( fluctuation_type = c("standard", "weighted"), optim_delta_epsilon = TRUE, use_best = FALSE, - verbose = FALSE) { + verbose = FALSE, bounds = list(Y = 1e-5, A=0.005)) { private$.maxit <- maxit private$.cvtmle <- cvtmle private$.one_dimensional <- one_dimensional @@ -135,6 +135,7 @@ tmle3_Update <- R6Class( if (self$one_dimensional) { EIF_components <- NULL + # If EIF components are provided use those instead of the full EIF tryCatch({ EIF_components <-lapply(clever_covariates, function(item) { item$EIF[[update_node]] @@ -166,7 +167,9 @@ tmle3_Update <- R6Class( # protect against qlogis(1)=Inf - initial <- bound(initial, 0.005) + initial <- bound(initial, self$bounds(update_node)) + + submodel_data <- list( observed = observed, @@ -429,6 +432,17 @@ tmle3_Update <- R6Class( private$.update_nodes, new_update_nodes )) + }, + bounds = function(node) { + bounds <- private$.bounds + if(is.numeric(bounds)) { + return(bounds) + } else if(is.null(bounds[[node]])) { + bounds <- 0.005 + } else { + bounds <- bounds[[node]] + } + return(bounds) } ), active = list( @@ -519,6 +533,7 @@ tmle3_Update <- R6Class( .use_best = NULL, .verbose = FALSE, .targeted_components = NULL, - .current_estimates = NULL + .current_estimates = NULL, + .bounds = NULL ) ) diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index f0ce6ead..4661a0a3 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -19,7 +19,7 @@ Y <- rnorm(n, mean = A+W, sd = 0.5) data <- data.table(W,A,Y) lrnr_Y0W <- Lrnr_glmnet$new() lrnr_A <- Lrnr_glm$new() -lrnr_sp <- Lrnr_glm_semiparametric$new(formula_sp=~1, lrnr_Y0W, interaction_variable = "A", family = gaussian(), return_matrix_predictions = TRUE) +lrnr_sp <- Lrnr_glm_semiparametric$new(formula_sp=~1, lrnr_Y0W, interaction_variable = "A", family = gaussian(), return_matrix_predictions = FALSE) node_list <- list (W = "W", A = "A", Y= "Y") @@ -30,7 +30,7 @@ tmle_task$get_tmle_node("Y", format = T) learner_list <- list(Y = lrnr_sp, A = lrnr_A) likelihood <- point_tx_likelihood(tmle_task, learner_list) -updater <- tmle3_Update$new() +updater <- tmle3_Update$new(verbose = T, bounds = list(Y = c(-Inf, Inf))) tlik <- Targeted_Likelihood$new(likelihood, updater = updater) param_cate <- Param_spCATE$new(tlik, formula_CATE = ~1, intervention_list_treatment = list(define_lf(LF_static, "A", value = 1)), intervention_list_control = list(define_lf(LF_static, "A", value = 0))) @@ -38,9 +38,8 @@ param_cate <- Param_spCATE$new(tlik, formula_CATE = ~1, intervention_list_treat ``` ```{r} -param_cate$formula_CATE -updater <- tlik$updater -updater$update() + +updater$update(tlik, tmle_task) ``` From 870e1a1438663eec0d661d6692e67cba08e012dc Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Sun, 5 Sep 2021 18:02:55 -0700 Subject: [PATCH 11/65] added more sp params --- NAMESPACE | 2 + R/Param_spCATE.R | 9 +- R/Param_spOR.R | 180 ++++++++++++++++++++++++++++++++++++++ R/Param_spRR.R | 187 ++++++++++++++++++++++++++++++++++++++++ man/Param_ATC.Rd | 2 + man/Param_ATE.Rd | 2 + man/Param_ATT.Rd | 2 + man/Param_MSM.Rd | 2 + man/Param_TSM.Rd | 2 + man/Param_base.Rd | 2 + man/Param_delta.Rd | 2 + man/Param_mean.Rd | 2 + man/Param_spCATE.Rd | 2 + man/Param_spOR.Rd | 65 ++++++++++++++ man/Param_spRR.Rd | 65 ++++++++++++++ man/Param_stratified.Rd | 2 + man/Param_survival.Rd | 2 + man/define_param.Rd | 2 + man/tmle3_Fit.Rd | 2 + vignettes/testing.Rmd | 6 +- 20 files changed, 534 insertions(+), 6 deletions(-) create mode 100644 R/Param_spOR.R create mode 100644 R/Param_spRR.R create mode 100644 man/Param_spOR.Rd create mode 100644 man/Param_spRR.Rd diff --git a/NAMESPACE b/NAMESPACE index b91d5d45..b42bf58f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,6 +21,8 @@ export(Param_base) export(Param_delta) export(Param_mean) export(Param_spCATE) +export(Param_spOR) +export(Param_spRR) export(Param_stratified) export(Param_survival) export(Targeted_Likelihood) diff --git a/R/Param_spCATE.R b/R/Param_spCATE.R index 9cce2818..a0bda88f 100644 --- a/R/Param_spCATE.R +++ b/R/Param_spCATE.R @@ -102,12 +102,13 @@ Param_spCATE <- R6Class( num <- gradM * ( g1/var_Y1) denom <- (g0/ var_Y0 + g1/var_Y1) hstar <- - num/denom - H <- (A*gradM + hstar) /var_Y - EIF <- as.matrix(H * (Y-Q)) + H <- as.matrix((A*gradM + hstar) /var_Y) # Store EIF component if(is_training_task) { - EIF_Y <- self$weights * as.matrix(H * (Y-Q)) + scale <- apply(V,2, function(v) {apply(self$weights*H *(A*v ),2,mean ) }) + scaleinv <- solve(scale) + EIF_Y <- self$weights * (H%*% scaleinv) * (Y-Q) } else { EIF_Y <- NULL } @@ -173,7 +174,7 @@ Param_spCATE <- R6Class( } ), private = list( - .type = "ATE", + .type = "CATE", .cf_likelihood_treatment = NULL, .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, diff --git a/R/Param_spOR.R b/R/Param_spOR.R new file mode 100644 index 00000000..c000cbe7 --- /dev/null +++ b/R/Param_spOR.R @@ -0,0 +1,180 @@ +#' Average Treatment Effect +#' +#' Parameter definition for the Average Treatment Effect (ATE). +#' @importFrom R6 R6Class +#' @importFrom uuid UUIDgenerate +#' @importFrom methods is +#' @family Parameters +#' @keywords data +#' +#' @return \code{Param_base} object +#' +#' @format \code{\link{R6Class}} object. +#' +#' @section Constructor: +#' \code{define_param(Param_ATT, observed_likelihood, intervention_list, ..., outcome_node)} +#' +#' \describe{ +#' \item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood +#' } +#' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. +#' } +#' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. +#' } +#' \item{\code{...}}{Not currently used. +#' } +#' \item{\code{outcome_node}}{character, the name of the node that should be treated as the outcome +#' } +#' } +#' + +#' @section Fields: +#' \describe{ +#' \item{\code{cf_likelihood_treatment}}{the counterfactual likelihood for the treatment +#' } +#' \item{\code{cf_likelihood_control}}{the counterfactual likelihood for the control +#' } +#' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention +#' } +#' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention +#' } +#' } +#' @export +Param_spOR <- R6Class( + classname = "Param_spOR", + portable = TRUE, + class = TRUE, + inherit = Param_base, + public = list( + initialize = function(observed_likelihood, formula_OR =~ 1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { + super$initialize(observed_likelihood, list(), outcome_node) + if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { + # add delta_Y=0 to intervention lists + outcome_censoring_node <- observed_likelihood$censoring_nodes[[outcome_node]] + censoring_intervention <- define_lf(LF_static, outcome_censoring_node, value = 1) + intervention_list_treatment <- c(intervention_list_treatment, censoring_intervention) + intervention_list_control <- c(intervention_list_control, censoring_intervention) + } + private$.formula_OR <- formula_OR + private$.cf_likelihood_treatment <- CF_Likelihood$new(observed_likelihood, intervention_list_treatment) + private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) + }, + clever_covariates = function(tmle_task = NULL, fold_number = "full") { + + + training_task <- self$observed_likelihood$training_task + if (is.null(tmle_task)) { + tmle_task <- training_task + } + if(training_task$uuid == tmle_task$uuid){ + is_training_task <- TRUE + } else { + is_training_task <- FALSE + } + + cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] + cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] + intervention_nodes <- union(names(self$intervention_list_treatment), names(self$intervention_list_control)) + + W <- tmle_task$get_tmle_node("W") + V <- model.matrix(self$formula_OR, as.data.frame(W)) + A <- tmle_task$get_tmle_node("A", format = TRUE)[[1]] + Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] + g <- self$observed_likelihood$get_likelihoods(tmle_task, "A", fold_number) + g1 <- ifelse(A==1, g, 1-g) + g0 <- 1-g1 + #Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + #Q0 <- Q_packed[[1]] + #Q1 <- Q_packed[[2]] + #Q <- Q_packed[[3]] + Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) + Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) + Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + Qorig <- Q + Q0 <- bound(Q0, 0.005) + Q1 <- bound(Q1, 0.005) + OR <- Q1*(1-Q1) / (Q0*(1-Q0)) + + + h_star <- -1*as.vector((g1*OR) / (g1*OR + (1-g1))) + H <- as.matrix(V*(A + hstar)) + + # Store EIF component + if(is_training_task) { + scale <- apply(V,2, function(v){apply(self$weights*as.vector( Q1*(1-Q1) * Q0*(1-Q0) * g1 * (1-g1) / (g1 * Q1*(1-Q1) + (1-g1) *Q0*(1-Q0) )) * v*V,2,mean)}) + scaleinv <- solve(scale) + EIF_Y <- self$weights * (H%*% scaleinv) * (Y-Q) + } else { + EIF_Y <- NULL + } + + return(list(Y = H, EIF = list(Y = EIF_Y))) + }, + estimates = function(tmle_task = NULL, fold_number = "full") { + if (is.null(tmle_task)) { + tmle_task <- self$observed_likelihood$training_task + } + cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] + cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] + + W <- tmle_task$get_tmle_node("W") + A <- tmle_task$get_tmle_node("A", format = TRUE)[[1]] + Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] + weights <- tmle_task$weights + # clever_covariates happen here (for this param) only, but this is repeated computation + EIF <- self$clever_covariates(tmle_task, fold_number)$EIF$Y + Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) + Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) + Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + Qtest <- ifelse(A==1, Q1, Q0) + if(!all(Qtest-Q==0)) { + stop("Q and Q1,Q0 dont match") + } + # Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + # Q0 <- Q_packed[[1]] + # Q1 <- Q_packed[[2]] + # Q <- Q_packed[[3]] + Q0 <- bound(Q0, 0.0005) + Q1 <- bound(Q1, 0.0005) + beta <- get_beta(W, A, self$formula_OR, Q1, Q0, family = binomial(), weights = weights) + OR <- exp(V%*%beta) + + IC <- EIF + + result <- list(psi = beta, IC = IC, OR = OR) + return(result) + } + ), + active = list( + name = function() { + param_form <- sprintf("ATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + return(param_form) + }, + cf_likelihood_treatment = function() { + return(private$.cf_likelihood_treatment) + }, + cf_likelihood_control = function() { + return(private$.cf_likelihood_control) + }, + intervention_list_treatment = function() { + return(self$cf_likelihood_treatment$intervention_list) + }, + intervention_list_control = function() { + return(self$cf_likelihood_control$intervention_list) + }, + update_nodes = function() { + return(c(self$outcome_node)) + }, + formula_OR = function(){ + return(private$.formula_OR) + } + ), + private = list( + .type = "OR", + .cf_likelihood_treatment = NULL, + .cf_likelihood_control = NULL, + .supports_outcome_censoring = TRUE, + .formula_OR = NULL, + .submodel = list(Y = "gaussian_identity") + ) +) diff --git a/R/Param_spRR.R b/R/Param_spRR.R new file mode 100644 index 00000000..109a8da0 --- /dev/null +++ b/R/Param_spRR.R @@ -0,0 +1,187 @@ +#' Average Treatment Effect +#' +#' Parameter definition for the Average Treatment Effect (ATE). +#' @importFrom R6 R6Class +#' @importFrom uuid UUIDgenerate +#' @importFrom methods is +#' @family Parameters +#' @keywords data +#' +#' @return \code{Param_base} object +#' +#' @format \code{\link{R6Class}} object. +#' +#' @section Constructor: +#' \code{define_param(Param_ATT, observed_likelihood, intervention_list, ..., outcome_node)} +#' +#' \describe{ +#' \item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood +#' } +#' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. +#' } +#' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. +#' } +#' \item{\code{...}}{Not currently used. +#' } +#' \item{\code{outcome_node}}{character, the name of the node that should be treated as the outcome +#' } +#' } +#' + +#' @section Fields: +#' \describe{ +#' \item{\code{cf_likelihood_treatment}}{the counterfactual likelihood for the treatment +#' } +#' \item{\code{cf_likelihood_control}}{the counterfactual likelihood for the control +#' } +#' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention +#' } +#' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention +#' } +#' } +#' @export +Param_spRR <- R6Class( + classname = "Param_spRR", + portable = TRUE, + class = TRUE, + inherit = Param_base, + public = list( + initialize = function(observed_likelihood, formula_RR =~ 1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { + super$initialize(observed_likelihood, list(), outcome_node) + if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { + # add delta_Y=0 to intervention lists + outcome_censoring_node <- observed_likelihood$censoring_nodes[[outcome_node]] + censoring_intervention <- define_lf(LF_static, outcome_censoring_node, value = 1) + intervention_list_treatment <- c(intervention_list_treatment, censoring_intervention) + intervention_list_control <- c(intervention_list_control, censoring_intervention) + } + private$.formula_RR <- formula_RR + private$.cf_likelihood_treatment <- CF_Likelihood$new(observed_likelihood, intervention_list_treatment) + private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) + }, + clever_covariates = function(tmle_task = NULL, fold_number = "full") { + + + training_task <- self$observed_likelihood$training_task + if (is.null(tmle_task)) { + tmle_task <- training_task + } + if(training_task$uuid == tmle_task$uuid){ + is_training_task <- TRUE + } else { + is_training_task <- FALSE + } + + cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] + cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] + intervention_nodes <- union(names(self$intervention_list_treatment), names(self$intervention_list_control)) + + W <- tmle_task$get_tmle_node("W") + V <- model.matrix(self$formula_RR, as.data.frame(W)) + A <- tmle_task$get_tmle_node("A", format = TRUE)[[1]] + Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] + + g <- self$observed_likelihood$get_likelihoods(tmle_task, "A", fold_number) + g1 <- ifelse(A==1, g, 1-g) + g0 <- 1-g1 + #Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + #Q0 <- Q_packed[[1]] + #Q1 <- Q_packed[[2]] + #Q <- Q_packed[[3]] + Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) + Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) + Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + Qorig <- Q + Q0 <- bound(Q0, 0.005) + Q1 <- bound(Q1, 0.005) + + RR <- Q1/Q0 + gradM <- V + mstar <- RR + (1-A)*1 + num <- gradM * ( RR * g1) + denom <- RR * g1 + g0 + hstar <- - num/denom + H <- as.matrix(A*gradM + hstar) + + # Store EIF component + if(is_training_task) { + scale <- apply(V,2, function(v) { + apply(weights*V*v*g1*g0*RR/(g1*RR + g0)^2 *(Y-Q) + H*(A*v*Q),2,mean) + }) + scaleinv <- solve(scale) + EIF_Y <- self$weights * (H%*% scaleinv) * (Y-Q) + } else { + EIF_Y <- NULL + } + + return(list(Y = H, EIF = list(Y = EIF_Y))) + }, + estimates = function(tmle_task = NULL, fold_number = "full") { + if (is.null(tmle_task)) { + tmle_task <- self$observed_likelihood$training_task + } + cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] + cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] + + W <- tmle_task$get_tmle_node("W") + A <- tmle_task$get_tmle_node("A", format = TRUE)[[1]] + Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] + weights <- tmle_task$weights + # clever_covariates happen here (for this param) only, but this is repeated computation + EIF <- self$clever_covariates(tmle_task, fold_number)$EIF$Y + Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) + Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) + Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + Qtest <- ifelse(A==1, Q1, Q0) + if(!all(Qtest-Q==0)) { + stop("Q and Q1,Q0 dont match") + } + # Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + # Q0 <- Q_packed[[1]] + # Q1 <- Q_packed[[2]] + # Q <- Q_packed[[3]] + + Q0 <- bound(Q0, 0.0005) + Q1 <- bound(Q1, 0.0005) + beta <- get_beta(W, A, self$formula_OR, Q1, Q0, family = poisson(), weights = weights) + RR <- exp(V%*%beta) + + IC <- EIF + + result <- list(psi = beta, IC = IC, RR = RR) + return(result) + } + ), + active = list( + name = function() { + param_form <- sprintf("ATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + return(param_form) + }, + cf_likelihood_treatment = function() { + return(private$.cf_likelihood_treatment) + }, + cf_likelihood_control = function() { + return(private$.cf_likelihood_control) + }, + intervention_list_treatment = function() { + return(self$cf_likelihood_treatment$intervention_list) + }, + intervention_list_control = function() { + return(self$cf_likelihood_control$intervention_list) + }, + update_nodes = function() { + return(c(self$outcome_node)) + }, + formula_RR = function(){ + return(private$.formula_RR) + } + ), + private = list( + .type = "RR", + .cf_likelihood_treatment = NULL, + .cf_likelihood_control = NULL, + .supports_outcome_censoring = TRUE, + .formula_RR = NULL, + .submodel = list(Y = "poisson_log") + ) +) diff --git a/man/Param_ATC.Rd b/man/Param_ATC.Rd index 94212aff..618ed4f2 100644 --- a/man/Param_ATC.Rd +++ b/man/Param_ATC.Rd @@ -64,6 +64,8 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_spCATE}}, +\code{\link{Param_spOR}}, +\code{\link{Param_spRR}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{define_param}()}, diff --git a/man/Param_ATE.Rd b/man/Param_ATE.Rd index 259e8c8d..57ce4807 100644 --- a/man/Param_ATE.Rd +++ b/man/Param_ATE.Rd @@ -54,6 +54,8 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_spCATE}}, +\code{\link{Param_spOR}}, +\code{\link{Param_spRR}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{define_param}()}, diff --git a/man/Param_ATT.Rd b/man/Param_ATT.Rd index b129e98b..7f92f357 100644 --- a/man/Param_ATT.Rd +++ b/man/Param_ATT.Rd @@ -64,6 +64,8 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_spCATE}}, +\code{\link{Param_spOR}}, +\code{\link{Param_spRR}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{define_param}()}, diff --git a/man/Param_MSM.Rd b/man/Param_MSM.Rd index a754fb9c..ad4716e0 100644 --- a/man/Param_MSM.Rd +++ b/man/Param_MSM.Rd @@ -62,6 +62,8 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_spCATE}}, +\code{\link{Param_spOR}}, +\code{\link{Param_spRR}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{define_param}()}, diff --git a/man/Param_TSM.Rd b/man/Param_TSM.Rd index 50092923..6f1fdf3b 100644 --- a/man/Param_TSM.Rd +++ b/man/Param_TSM.Rd @@ -58,6 +58,8 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_spCATE}}, +\code{\link{Param_spOR}}, +\code{\link{Param_spRR}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{define_param}()}, diff --git a/man/Param_base.Rd b/man/Param_base.Rd index 4c6b8fd2..8ec0cf42 100644 --- a/man/Param_base.Rd +++ b/man/Param_base.Rd @@ -72,6 +72,8 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_spCATE}}, +\code{\link{Param_spOR}}, +\code{\link{Param_spRR}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{define_param}()}, diff --git a/man/Param_delta.Rd b/man/Param_delta.Rd index 50a31c93..49a4b926 100644 --- a/man/Param_delta.Rd +++ b/man/Param_delta.Rd @@ -19,6 +19,8 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_mean}}, \code{\link{Param_spCATE}}, +\code{\link{Param_spOR}}, +\code{\link{Param_spRR}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{define_param}()}, diff --git a/man/Param_mean.Rd b/man/Param_mean.Rd index 570e4e90..d56c1b88 100644 --- a/man/Param_mean.Rd +++ b/man/Param_mean.Rd @@ -47,6 +47,8 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_spCATE}}, +\code{\link{Param_spOR}}, +\code{\link{Param_spRR}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{define_param}()}, diff --git a/man/Param_spCATE.Rd b/man/Param_spCATE.Rd index bf5dabb5..79215f89 100644 --- a/man/Param_spCATE.Rd +++ b/man/Param_spCATE.Rd @@ -54,6 +54,8 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_spOR}}, +\code{\link{Param_spRR}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{define_param}()}, diff --git a/man/Param_spOR.Rd b/man/Param_spOR.Rd new file mode 100644 index 00000000..64d8bab5 --- /dev/null +++ b/man/Param_spOR.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Param_spOR.R +\name{Param_spOR} +\alias{Param_spOR} +\title{Average Treatment Effect} +\format{ +\code{\link{R6Class}} object. +} +\value{ +\code{Param_base} object +} +\description{ +Parameter definition for the Average Treatment Effect (ATE). +} +\section{Constructor}{ + +\code{define_param(Param_ATT, observed_likelihood, intervention_list, ..., outcome_node)} + +\describe{ +\item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood +} +\item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. +} +\item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. +} +\item{\code{...}}{Not currently used. +} +\item{\code{outcome_node}}{character, the name of the node that should be treated as the outcome +} +} +} + +\section{Fields}{ + +\describe{ +\item{\code{cf_likelihood_treatment}}{the counterfactual likelihood for the treatment +} +\item{\code{cf_likelihood_control}}{the counterfactual likelihood for the control +} +\item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention +} +\item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention +} +} +} + +\seealso{ +Other Parameters: +\code{\link{Param_ATC}}, +\code{\link{Param_ATE}}, +\code{\link{Param_ATT}}, +\code{\link{Param_MSM}}, +\code{\link{Param_TSM}}, +\code{\link{Param_base}}, +\code{\link{Param_delta}}, +\code{\link{Param_mean}}, +\code{\link{Param_spCATE}}, +\code{\link{Param_spRR}}, +\code{\link{Param_stratified}}, +\code{\link{Param_survival}}, +\code{\link{define_param}()}, +\code{\link{tmle3_Fit}} +} +\concept{Parameters} +\keyword{data} diff --git a/man/Param_spRR.Rd b/man/Param_spRR.Rd new file mode 100644 index 00000000..f76fae1b --- /dev/null +++ b/man/Param_spRR.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Param_spRR.R +\name{Param_spRR} +\alias{Param_spRR} +\title{Average Treatment Effect} +\format{ +\code{\link{R6Class}} object. +} +\value{ +\code{Param_base} object +} +\description{ +Parameter definition for the Average Treatment Effect (ATE). +} +\section{Constructor}{ + +\code{define_param(Param_ATT, observed_likelihood, intervention_list, ..., outcome_node)} + +\describe{ +\item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood +} +\item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. +} +\item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. +} +\item{\code{...}}{Not currently used. +} +\item{\code{outcome_node}}{character, the name of the node that should be treated as the outcome +} +} +} + +\section{Fields}{ + +\describe{ +\item{\code{cf_likelihood_treatment}}{the counterfactual likelihood for the treatment +} +\item{\code{cf_likelihood_control}}{the counterfactual likelihood for the control +} +\item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention +} +\item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention +} +} +} + +\seealso{ +Other Parameters: +\code{\link{Param_ATC}}, +\code{\link{Param_ATE}}, +\code{\link{Param_ATT}}, +\code{\link{Param_MSM}}, +\code{\link{Param_TSM}}, +\code{\link{Param_base}}, +\code{\link{Param_delta}}, +\code{\link{Param_mean}}, +\code{\link{Param_spCATE}}, +\code{\link{Param_spOR}}, +\code{\link{Param_stratified}}, +\code{\link{Param_survival}}, +\code{\link{define_param}()}, +\code{\link{tmle3_Fit}} +} +\concept{Parameters} +\keyword{data} diff --git a/man/Param_stratified.Rd b/man/Param_stratified.Rd index 8764afcb..4a09a9a3 100644 --- a/man/Param_stratified.Rd +++ b/man/Param_stratified.Rd @@ -58,6 +58,8 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_spCATE}}, +\code{\link{Param_spOR}}, +\code{\link{Param_spRR}}, \code{\link{Param_survival}}, \code{\link{define_param}()}, \code{\link{tmle3_Fit}} diff --git a/man/Param_survival.Rd b/man/Param_survival.Rd index 0e5c195e..66d43181 100644 --- a/man/Param_survival.Rd +++ b/man/Param_survival.Rd @@ -49,6 +49,8 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_spCATE}}, +\code{\link{Param_spOR}}, +\code{\link{Param_spRR}}, \code{\link{Param_stratified}}, \code{\link{define_param}()}, \code{\link{tmle3_Fit}} diff --git a/man/define_param.Rd b/man/define_param.Rd index b874ecd5..7ed31df7 100644 --- a/man/define_param.Rd +++ b/man/define_param.Rd @@ -25,6 +25,8 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_spCATE}}, +\code{\link{Param_spOR}}, +\code{\link{Param_spRR}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{tmle3_Fit}} diff --git a/man/tmle3_Fit.Rd b/man/tmle3_Fit.Rd index 838a9091..45856eb1 100644 --- a/man/tmle3_Fit.Rd +++ b/man/tmle3_Fit.Rd @@ -106,6 +106,8 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_spCATE}}, +\code{\link{Param_spOR}}, +\code{\link{Param_spRR}}, \code{\link{Param_stratified}}, \code{\link{Param_survival}}, \code{\link{define_param}()} diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index 4661a0a3..178ba55e 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -35,13 +35,15 @@ tlik <- Targeted_Likelihood$new(likelihood, updater = updater) param_cate <- Param_spCATE$new(tlik, formula_CATE = ~1, intervention_list_treatment = list(define_lf(LF_static, "A", value = 1)), intervention_list_control = list(define_lf(LF_static, "A", value = 0))) -``` -```{r} updater$update(tlik, tmle_task) ``` +```{r} +param_cate$estimates(tmle_task)$psi +sd(param_cate$estimates(tmle_task)$IC) +``` From ceac71b24170901e7821091aa6d97f50206d144a Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Sun, 5 Sep 2021 21:52:46 -0700 Subject: [PATCH 12/65] causalGLM seems to work --- NAMESPACE | 1 + R/Lrnr_glm_semiparametric.R | 4 +- R/Param_spCATE.R | 49 +++++++++++------- R/Param_spOR.R | 24 ++++----- R/Param_spRR.R | 36 +++++++------- R/helpers_semiparametric.R | 2 +- R/tmle3_Update.R | 24 +++++++-- R/tmle3_spec_spCausalGLM.R | 93 +++++++++++++++++++++++++++++++++++ man/Param_spCATE.Rd | 2 + man/Param_spOR.Rd | 2 + man/Param_spRR.Rd | 2 + man/tmle3_Spec_spCausalGLM.Rd | 8 +++ vignettes/testing.Rmd | 69 ++++++++++++++------------ 13 files changed, 234 insertions(+), 82 deletions(-) create mode 100644 R/tmle3_spec_spCausalGLM.R create mode 100644 man/tmle3_Spec_spCausalGLM.Rd diff --git a/NAMESPACE b/NAMESPACE index b42bf58f..1eea527a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -79,6 +79,7 @@ export(tmle3_Spec_OR) export(tmle3_Spec_PAR) export(tmle3_Spec_RR) export(tmle3_Spec_TSM_all) +export(tmle3_Spec_spCausalGLM) export(tmle3_Spec_stratified) export(tmle3_Spec_survival) export(tmle3_Task) diff --git a/R/Lrnr_glm_semiparametric.R b/R/Lrnr_glm_semiparametric.R index bc17b269..08a18081 100644 --- a/R/Lrnr_glm_semiparametric.R +++ b/R/Lrnr_glm_semiparametric.R @@ -71,7 +71,7 @@ Lrnr_glm_semiparametric <- R6Class( task_baseline <- task$next_in_chain(covariates = covariates) lrnr_baseline <- lrnr_baseline$train(task_baseline[A==0]) Q0 <- lrnr_baseline$predict(task_baseline) - beta <- coef(glm.fit(A*V, Y, offset = family$linkfun(Q0), intercept = F, weights = task$weights, family = family)) + beta <- suppressWarnings(coef(glm.fit(A*V, Y, offset = family$linkfun(Q0), intercept = F, weights = task$weights, family = family))) Q1 <- family$linkinv(family$linkfun(Q0) + V%*%beta) Q <- ifelse(A==1, Q1, Q0) } else { @@ -99,7 +99,7 @@ Lrnr_glm_semiparametric <- R6Class( Q0 <- lrnr_baseline$predict(task_baseline0) # Project onto model - beta <- coef(glm.fit(A*V, Q, offset = family$linkfun(Q0), intercept = F, weights = task$weights, family = family)) + beta <- suppressWarnings(coef(glm.fit(A*V, Q, offset = family$linkfun(Q0), intercept = F, weights = task$weights, family = family))) } diff --git a/R/Param_spCATE.R b/R/Param_spCATE.R index a0bda88f..32cd74e2 100644 --- a/R/Param_spCATE.R +++ b/R/Param_spCATE.R @@ -17,6 +17,8 @@ #' \describe{ #' \item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood #' } +#' \item{\code{formula_CATE}}{... +#' } #' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. #' } #' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. @@ -48,6 +50,11 @@ Param_spCATE <- R6Class( public = list( initialize = function(observed_likelihood, formula_CATE =~ 1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) + training_task <- self$observed_likelihood$training_task + W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") + V <- model.matrix(formula_CATE, as.data.frame(W)) + private$.targeted <- rep(T,ncol(V)) + if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists outcome_censoring_node <- observed_likelihood$censoring_nodes[[outcome_node]] @@ -59,18 +66,14 @@ Param_spCATE <- R6Class( private$.cf_likelihood_treatment <- CF_Likelihood$new(observed_likelihood, intervention_list_treatment) private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) }, - clever_covariates = function(tmle_task = NULL, fold_number = "full") { + clever_covariates = function(tmle_task = NULL, fold_number = "full", is_training_task = TRUE) { training_task <- self$observed_likelihood$training_task if (is.null(tmle_task)) { tmle_task <- training_task } - if(training_task$uuid == tmle_task$uuid){ - is_training_task <- TRUE - } else { - is_training_task <- FALSE - } + cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] @@ -78,8 +81,8 @@ Param_spCATE <- R6Class( W <- tmle_task$get_tmle_node("W") V <- model.matrix(self$formula_CATE, as.data.frame(W)) - A <- tmle_task$get_tmle_node("A", format = TRUE)[[1]] - Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] + A <- tmle_task$get_tmle_node("A", format = T )[[1]] + Y <- tmle_task$get_tmle_node("Y", format = T )[[1]] g <- self$observed_likelihood$get_likelihoods(tmle_task, "A", fold_number) g1 <- ifelse(A==1, g, 1-g) @@ -89,7 +92,9 @@ Param_spCATE <- R6Class( #Q1 <- Q_packed[[2]] #Q <- Q_packed[[3]] Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) - + Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) + Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + #print(data.table(Q0,Q1,Q)) #Extract current semiparametric coef #print(data.table(Q1,Q0)) #beta <- get_beta(W, A, self$formula_CATE, Q1, Q0, family = gaussian(), weights = weights) @@ -104,15 +109,20 @@ Param_spCATE <- R6Class( hstar <- - num/denom H <- as.matrix((A*gradM + hstar) /var_Y) + EIF_Y <- NULL # Store EIF component if(is_training_task) { - scale <- apply(V,2, function(v) {apply(self$weights*H *(A*v ),2,mean ) }) + scale <- apply(V,2, function(v) {apply(self$weights * H *(A*v ),2,mean ) }) + scaleinv <- solve(scale) - EIF_Y <- self$weights * (H%*% scaleinv) * (Y-Q) - } else { - EIF_Y <- NULL + EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y-Q) + + + # print(dim(EIF_Y)) + #print(mean(EIF_Y)) } + return(list(Y = H, EIF = list(Y = EIF_Y))) }, estimates = function(tmle_task = NULL, fold_number = "full") { @@ -123,16 +133,20 @@ Param_spCATE <- R6Class( cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] W <- tmle_task$get_tmle_node("W") - A <- tmle_task$get_tmle_node("A", format = TRUE)[[1]] - Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] + A <- tmle_task$get_tmle_node("A", format = T )[[1]] + Y <- tmle_task$get_tmle_node("Y", format = T )[[1]] + weights <- tmle_task$weights # clever_covariates happen here (for this param) only, but this is repeated computation - EIF <- self$clever_covariates(tmle_task, fold_number)$EIF$Y + clev <- self$clever_covariates(tmle_task, fold_number, is_training_task = TRUE) + + EIF <- clev$EIF$Y Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) Qtest <- ifelse(A==1, Q1, Q0) if(!all(Qtest-Q==0)) { + print(quantile(abs(Qtest-Q))) stop("Q and Q1,Q0 dont match") } # Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) @@ -141,9 +155,10 @@ Param_spCATE <- R6Class( # Q <- Q_packed[[3]] beta <- get_beta(W, A, self$formula_CATE, Q1, Q0, family = gaussian(), weights = weights) + CATE <- Q1 - Q0 - IC <- EIF + IC <- as.matrix(EIF) result <- list(psi = beta, IC = IC, CATE = CATE) return(result) diff --git a/R/Param_spOR.R b/R/Param_spOR.R index c000cbe7..0ae0dbcf 100644 --- a/R/Param_spOR.R +++ b/R/Param_spOR.R @@ -17,6 +17,8 @@ #' \describe{ #' \item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood #' } +#' \item{\code{formula_OR}}{... +#' } #' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. #' } #' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. @@ -59,18 +61,14 @@ Param_spOR <- R6Class( private$.cf_likelihood_treatment <- CF_Likelihood$new(observed_likelihood, intervention_list_treatment) private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) }, - clever_covariates = function(tmle_task = NULL, fold_number = "full") { + clever_covariates = function(tmle_task = NULL, fold_number = "full", is_training_task = TRUE) { training_task <- self$observed_likelihood$training_task if (is.null(tmle_task)) { tmle_task <- training_task } - if(training_task$uuid == tmle_task$uuid){ - is_training_task <- TRUE - } else { - is_training_task <- FALSE - } + cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] @@ -96,16 +94,19 @@ Param_spOR <- R6Class( OR <- Q1*(1-Q1) / (Q0*(1-Q0)) - h_star <- -1*as.vector((g1*OR) / (g1*OR + (1-g1))) - H <- as.matrix(V*(A + hstar)) + h_star <- -1*as.vector((g1*OR) / (g1*OR + (1-g1))) + H <- as.matrix(V*(A + h_star)) # Store EIF component + EIF_Y <- NULL if(is_training_task) { + tryCatch({ scale <- apply(V,2, function(v){apply(self$weights*as.vector( Q1*(1-Q1) * Q0*(1-Q0) * g1 * (1-g1) / (g1 * Q1*(1-Q1) + (1-g1) *Q0*(1-Q0) )) * v*V,2,mean)}) scaleinv <- solve(scale) EIF_Y <- self$weights * (H%*% scaleinv) * (Y-Q) - } else { - EIF_Y <- NULL + }, error = function(...){ + + }) } return(list(Y = H, EIF = list(Y = EIF_Y))) @@ -122,7 +123,7 @@ Param_spOR <- R6Class( Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] weights <- tmle_task$weights # clever_covariates happen here (for this param) only, but this is repeated computation - EIF <- self$clever_covariates(tmle_task, fold_number)$EIF$Y + EIF <- self$clever_covariates(tmle_task, fold_number, is_training_task = TRUE)$EIF$Y Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) @@ -137,6 +138,7 @@ Param_spOR <- R6Class( Q0 <- bound(Q0, 0.0005) Q1 <- bound(Q1, 0.0005) beta <- get_beta(W, A, self$formula_OR, Q1, Q0, family = binomial(), weights = weights) + V <- model.matrix(self$formula_OR, as.data.frame(W)) OR <- exp(V%*%beta) IC <- EIF diff --git a/R/Param_spRR.R b/R/Param_spRR.R index 109a8da0..0e8c1c73 100644 --- a/R/Param_spRR.R +++ b/R/Param_spRR.R @@ -17,6 +17,8 @@ #' \describe{ #' \item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood #' } +#' \item{\code{formula_RR}}{... +#' } #' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. #' } #' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. @@ -59,18 +61,14 @@ Param_spRR <- R6Class( private$.cf_likelihood_treatment <- CF_Likelihood$new(observed_likelihood, intervention_list_treatment) private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) }, - clever_covariates = function(tmle_task = NULL, fold_number = "full") { + clever_covariates = function(tmle_task = NULL, fold_number = "full", is_training_task = TRUE) { training_task <- self$observed_likelihood$training_task if (is.null(tmle_task)) { tmle_task <- training_task } - if(training_task$uuid == tmle_task$uuid){ - is_training_task <- TRUE - } else { - is_training_task <- FALSE - } + cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] @@ -92,8 +90,9 @@ Param_spRR <- R6Class( Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) Qorig <- Q - Q0 <- bound(Q0, 0.005) - Q1 <- bound(Q1, 0.005) + + Q0 <- pmax(Q0, 0.005) + Q1 <- pmax(Q1, 0.005) RR <- Q1/Q0 gradM <- V @@ -104,14 +103,15 @@ Param_spRR <- R6Class( H <- as.matrix(A*gradM + hstar) # Store EIF component + EIF_Y <- NULL if(is_training_task) { + scale <- apply(V,2, function(v) { - apply(weights*V*v*g1*g0*RR/(g1*RR + g0)^2 *(Y-Q) + H*(A*v*Q),2,mean) + apply(self$weights*V*v*g1*g0*RR/(g1*RR + g0)^2 *(Y-Q) + H*(A*v*Q),2,mean) }) scaleinv <- solve(scale) - EIF_Y <- self$weights * (H%*% scaleinv) * (Y-Q) - } else { - EIF_Y <- NULL + EIF_Y <- as.matrix(self$weights * (H%*% scaleinv) * (Y-Q)) + } return(list(Y = H, EIF = list(Y = EIF_Y))) @@ -126,9 +126,10 @@ Param_spRR <- R6Class( W <- tmle_task$get_tmle_node("W") A <- tmle_task$get_tmle_node("A", format = TRUE)[[1]] Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] + weights <- tmle_task$weights # clever_covariates happen here (for this param) only, but this is repeated computation - EIF <- self$clever_covariates(tmle_task, fold_number)$EIF$Y + EIF <- self$clever_covariates(tmle_task, fold_number, is_training_task = TRUE)$EIF$Y Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) @@ -141,12 +142,13 @@ Param_spRR <- R6Class( # Q1 <- Q_packed[[2]] # Q <- Q_packed[[3]] - Q0 <- bound(Q0, 0.0005) - Q1 <- bound(Q1, 0.0005) - beta <- get_beta(W, A, self$formula_OR, Q1, Q0, family = poisson(), weights = weights) + Q0 <- pmax(Q0, 0.0005) + Q1 <- pmax(Q1, 0.0005) + beta <- get_beta(W, A, self$formula_RR, Q1, Q0, family = poisson(), weights = weights) + V <- model.matrix(self$formula_RR, as.data.frame(W)) RR <- exp(V%*%beta) - IC <- EIF + IC <- as.matrix(EIF) result <- list(psi = beta, IC = IC, RR = RR) return(result) diff --git a/R/helpers_semiparametric.R b/R/helpers_semiparametric.R index f1ed7735..60a8abcc 100644 --- a/R/helpers_semiparametric.R +++ b/R/helpers_semiparametric.R @@ -6,7 +6,7 @@ get_beta <- function(W, A, formula, Q1, Q0, family, weights = NULL) { } V <- model.matrix(formula, as.data.frame(W)) Q <- ifelse(A==1, Q1, Q0) - beta <- coef(glm.fit(A*V, Q, offset = family$linkfun(Q0), family = family, intercept = F, weights = weights)) + beta <- suppressWarnings(coef(glm.fit(A*V, Q, offset = family$linkfun(Q0), family = family, intercept = F, weights = weights))) return(beta) } diff --git a/R/tmle3_Update.R b/R/tmle3_Update.R index 89ff026c..801f8169 100644 --- a/R/tmle3_Update.R +++ b/R/tmle3_Update.R @@ -52,7 +52,7 @@ tmle3_Update <- R6Class( # TODO: change maxit for test initialize = function(maxit = 100, cvtmle = TRUE, one_dimensional = FALSE, constrain_step = FALSE, delta_epsilon = 1e-4, - convergence_type = c("scaled_var", "sample_size"), + convergence_type = c("scaled_var", "sample_size", "exact"), fluctuation_type = c("standard", "weighted"), optim_delta_epsilon = TRUE, use_best = FALSE, @@ -67,6 +67,7 @@ tmle3_Update <- R6Class( private$.optim_delta_epsilon <- optim_delta_epsilon private$.use_best <- use_best private$.verbose <- verbose + private$.bounds <- bounds }, collapse_covariates = function(estimates, clever_covariates) { ED <- ED_from_estimates(estimates) @@ -141,6 +142,10 @@ tmle3_Update <- R6Class( item$EIF[[update_node]] }) EIF_components <- do.call(cbind, EIF_components) + + ED <- colMeans(EIF_components) + + EDnormed <- ED / norm(ED, type = "2") if(length(EIF_components) ==0 || ncol(EIF_components) != ncol(covariates_dt)) { stop("Not all params provide EIF components") } @@ -167,6 +172,7 @@ tmle3_Update <- R6Class( # protect against qlogis(1)=Inf + initial <- bound(initial, self$bounds(update_node)) @@ -206,6 +212,7 @@ tmle3_Update <- R6Class( fit_submodel = function(submodel_data) { # Extract submodel spec info EDnormed <- submodel_data$EDnormed + if(!is.null(EDnormed)) { # Collapse clever covariates submodel_data$H <- as.matrix(submodel_data$H) %*% EDnormed @@ -251,8 +258,9 @@ tmle3_Update <- R6Class( epsilon <- self$delta_epsilon } - #risk_val <- risk(epsilon) - #risk_zero <- risk(0) + + risk_val <- risk(epsilon) + risk_zero <- risk(0) # # TODO: consider if we should do this # if(risk_zero Date: Sun, 5 Sep 2021 22:11:46 -0700 Subject: [PATCH 13/65] hi --- R/Lrnr_glm_semiparametric.R | 15 +++++++++++++-- man/Lrnr_glm_semiparametric.Rd | 15 +++++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/R/Lrnr_glm_semiparametric.R b/R/Lrnr_glm_semiparametric.R index 08a18081..ac6d6c3f 100644 --- a/R/Lrnr_glm_semiparametric.R +++ b/R/Lrnr_glm_semiparametric.R @@ -1,6 +1,6 @@ #' Semiparametric Generalized Linear Models #' -#' This learner provides fitting procedures for generalized linear models using +#' This learner provides fitting procedures for semiparametric generalized linear models using a user-given baseline learner and #' \code{\link[stats]{glm.fit}}. #' #' @docType class @@ -21,7 +21,18 @@ #' #' @section Parameters: #' \describe{ -#' \item{\code{...}}{Parameters passed to \code{\link[stats]{glm}}.} +#' \item{\code{formula_sp}}{ A \code{formula} object specifying the parametric component of the semiparametric model.} +#' \item{\code{lrnr_baseline}}{A baseline learner for estimation of the nonparametric component.} +#' \item{\code{interaction_variable}}{A interaction variable to multiply with the design matrix generated by \code{formula_sp}. If NULL then the interaction variable is treated as the value 1. +#' In many applications, this represents a binary treatment variable `A`.} +#' \item{\code{family}}{A family object whose link function specifies the type of semiparametric model (e.g. partially-linear least-squares (\code{gaussian}), partially-linear logistic regression (\code{binomial}), partially-linear relative-risk regression (\code{poisson}) } +#' \item{\code{append_interaction_matrix}}{Whether to \code{lrnr_baseline} should be fit on `cbind(task$X,V)` where `V` is the design matrix obtained from \code{formula_sp}. +#' Note, if `append_interaction_matrix = TRUE`, the resulting estimator will be projected onto the semiparametric model using \code{glm.fit}. +#' If this is FALSE and \code{interaction_variable} is binary then the semiparametric model is learned by stratifying on \code{interaction_variable}. +#' Specifically, if FALSE, \code{lrnr_baseline} is used to estimate `E[Y|A=0,W]` by subsetting to only observations with `A` = 0. +#' In the binary case, setting `append_interaction_matrix = TRUE` allows one to pool the learning across treatment arms and allows additive models to perform well. } +#' \item{\code{return_matrix_predictions}}{Only used if \code{interaction_variable} is binary. Whether to return a matrix output with three columns being `E[Y|A=0,W], E[Y|A=1,W], E[Y|A,W]`.} +#' #' } #' # diff --git a/man/Lrnr_glm_semiparametric.Rd b/man/Lrnr_glm_semiparametric.Rd index 7f556c41..2c9ea56c 100644 --- a/man/Lrnr_glm_semiparametric.Rd +++ b/man/Lrnr_glm_semiparametric.Rd @@ -12,13 +12,24 @@ Learner object with methods for training and prediction. See \code{\link{Lrnr_base}} for documentation on learners. } \description{ -This learner provides fitting procedures for generalized linear models using +This learner provides fitting procedures for semiparametric generalized linear models using a user-given baseline learner and \code{\link[stats]{glm.fit}}. } \section{Parameters}{ \describe{ -\item{\code{...}}{Parameters passed to \code{\link[stats]{glm}}.} +\item{\code{formula_sp}}{ A \code{formula} object specifying the parametric component of the semiparametric model.} +\item{\code{lrnr_baseline}}{A baseline learner for estimation of the nonparametric component.} +\item{\code{interaction_variable}}{A interaction variable to multiply with the design matrix generated by \code{formula_sp}. If NULL then the interaction variable is treated as the value 1. +In many applications, this represents a binary treatment variable \code{A}.} +\item{\code{family}}{A family object whose link function specifies the type of semiparametric model (e.g. partially-linear least-squares (\code{gaussian}), partially-linear logistic regression (\code{binomial}), partially-linear relative-risk regression (\code{poisson}) } +\item{\code{append_interaction_matrix}}{Whether to \code{lrnr_baseline} should be fit on \code{cbind(task$X,V)} where \code{V} is the design matrix obtained from \code{formula_sp}. +Note, if \code{append_interaction_matrix = TRUE}, the resulting estimator will be projected onto the semiparametric model using \code{glm.fit}. +If this is FALSE and \code{interaction_variable} is binary then the semiparametric model is learned by stratifying on \code{interaction_variable}. +Specifically, if FALSE, \code{lrnr_baseline} is used to estimate \verb{E[Y|A=0,W]} by subsetting to only observations with \code{A} = 0. +In the binary case, setting \code{append_interaction_matrix = TRUE} allows one to pool the learning across treatment arms and allows additive models to perform well. } +\item{\code{return_matrix_predictions}}{Only used if \code{interaction_variable} is binary. Whether to return a matrix output with three columns being \verb{E[Y|A=0,W], E[Y|A=1,W], E[Y|A,W]}.} + } } From 7ba9492511acf084f2e1d9e79b2008e675289b06 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Sun, 5 Sep 2021 22:33:54 -0700 Subject: [PATCH 14/65] Update tmle3_Update.R --- R/tmle3_Update.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/tmle3_Update.R b/R/tmle3_Update.R index 801f8169..c2f1edf1 100644 --- a/R/tmle3_Update.R +++ b/R/tmle3_Update.R @@ -288,7 +288,7 @@ tmle3_Update <- R6Class( } else if (self$fluctuation_type == "weighted") { if (self$one_dimensional) { suppressWarnings({ - submodel_fit <- glm(observed ~ -1, submodel_data, + submodel_fit <- glm(observed ~ 1, submodel_data, offset = family_object$linkfun(submodel_data$initial), family = family_object, weights = as.numeric(H) * submodel_data$weights, From 9b64c827b6642110bce2eb51f92369541005b3c0 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Sun, 5 Sep 2021 22:35:17 -0700 Subject: [PATCH 15/65] Remove built in submodels/losses --- R/tmle3_Update.R | 6 ------ 1 file changed, 6 deletions(-) diff --git a/R/tmle3_Update.R b/R/tmle3_Update.R index c2f1edf1..b51a0eca 100644 --- a/R/tmle3_Update.R +++ b/R/tmle3_Update.R @@ -328,12 +328,6 @@ tmle3_Update <- R6Class( epsilon <- epsilon * EDnormed return(epsilon) }, - submodel = function(epsilon, initial, H, observed) { - plogis(qlogis(initial) + H %*% epsilon) - }, - loss_function = function(estimate, observed) { - -1 * ifelse(observed == 1, log(estimate), log(1 - estimate)) - }, apply_submodel = function(submodel, submodel_data, epsilon) { submodel(epsilon, submodel_data$initial, submodel_data$H, submodel_data$observed) }, From 01b111640932034b0192a3e95f3124a1476d088c Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Sun, 5 Sep 2021 22:58:31 -0700 Subject: [PATCH 16/65] Delete submodels_semiparametric.R --- R/submodels_semiparametric.R | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 R/submodels_semiparametric.R diff --git a/R/submodels_semiparametric.R b/R/submodels_semiparametric.R deleted file mode 100644 index 6be3f486..00000000 --- a/R/submodels_semiparametric.R +++ /dev/null @@ -1,9 +0,0 @@ - - -submodel_function <- function(eps, offset, X, observed) { - offset_unpacked <- sl3::unpack_predictions(offset) - Q0 <- offset_unpacked[[1]] - Q1 <- offset_unpacked[[2]] - Q <- offset_unpacked[[3]] - -} From a22e578759137e1abf0415f6d6064c198bdf6cac Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 09:42:34 -0700 Subject: [PATCH 17/65] np params --- NAMESPACE | 2 + R/Lrnr_glm_semiparametric.R | 11 +- R/Param_npCATE.R | 195 +++++++++++++++++++++++++++++++++ R/Param_npCATT.R | 194 ++++++++++++++++++++++++++++++++ R/Param_npOR.R | 192 ++++++++++++++++++++++++++++++++ R/tmle3_Spec_npCausalGLM.R | 91 +++++++++++++++ man/Lrnr_glm_semiparametric.Rd | 5 +- man/Param_ATC.Rd | 2 + man/Param_ATE.Rd | 2 + man/Param_ATT.Rd | 2 + man/Param_MSM.Rd | 2 + man/Param_TSM.Rd | 2 + man/Param_base.Rd | 2 + man/Param_delta.Rd | 2 + man/Param_mean.Rd | 2 + man/Param_npCATE.Rd | 69 ++++++++++++ man/Param_npOR.Rd | 69 ++++++++++++ man/Param_spCATE.Rd | 2 + man/Param_spOR.Rd | 2 + man/Param_spRR.Rd | 2 + man/Param_stratified.Rd | 2 + man/Param_survival.Rd | 2 + man/define_param.Rd | 2 + man/tmle3_Fit.Rd | 2 + 24 files changed, 851 insertions(+), 7 deletions(-) create mode 100644 R/Param_npCATE.R create mode 100644 R/Param_npCATT.R create mode 100644 R/Param_npOR.R create mode 100644 R/tmle3_Spec_npCausalGLM.R create mode 100644 man/Param_npCATE.Rd create mode 100644 man/Param_npOR.Rd diff --git a/NAMESPACE b/NAMESPACE index 1eea527a..3920d261 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,8 @@ export(Param_TSM) export(Param_base) export(Param_delta) export(Param_mean) +export(Param_npCATE) +export(Param_npOR) export(Param_spCATE) export(Param_spOR) export(Param_spRR) diff --git a/R/Lrnr_glm_semiparametric.R b/R/Lrnr_glm_semiparametric.R index ac6d6c3f..2eea612f 100644 --- a/R/Lrnr_glm_semiparametric.R +++ b/R/Lrnr_glm_semiparametric.R @@ -1,7 +1,8 @@ #' Semiparametric Generalized Linear Models #' #' This learner provides fitting procedures for semiparametric generalized linear models using a user-given baseline learner and -#' \code{\link[stats]{glm.fit}}. +#' \code{\link[stats]{glm.fit}}. It supports models of the form `linkfun(E[Y|A,W]) = linkfun(E[Y|A=0,W]) + A * f(W)` where `A` is a binary or continuous interaction variable, +#' and `f(W)` is a user-specified parametric function (e.g. `f(W) = model.matrix(formula_sp, W)`). #' #' @docType class #' @@ -26,7 +27,7 @@ #' \item{\code{interaction_variable}}{A interaction variable to multiply with the design matrix generated by \code{formula_sp}. If NULL then the interaction variable is treated as the value 1. #' In many applications, this represents a binary treatment variable `A`.} #' \item{\code{family}}{A family object whose link function specifies the type of semiparametric model (e.g. partially-linear least-squares (\code{gaussian}), partially-linear logistic regression (\code{binomial}), partially-linear relative-risk regression (\code{poisson}) } -#' \item{\code{append_interaction_matrix}}{Whether to \code{lrnr_baseline} should be fit on `cbind(task$X,V)` where `V` is the design matrix obtained from \code{formula_sp}. +#' \item{\code{append_interaction_matrix}}{Whether to \code{lrnr_baseline} should be fit on `cbind(task$X,A*V)` where `A` is the interaction variable and `V` is the design matrix obtained from \code{formula_sp}. #' Note, if `append_interaction_matrix = TRUE`, the resulting estimator will be projected onto the semiparametric model using \code{glm.fit}. #' If this is FALSE and \code{interaction_variable} is binary then the semiparametric model is learned by stratifying on \code{interaction_variable}. #' Specifically, if FALSE, \code{lrnr_baseline} is used to estimate `E[Y|A=0,W]` by subsetting to only observations with `A` = 0. @@ -47,7 +48,7 @@ Lrnr_glm_semiparametric <- R6Class( ), private = list( - .properties = c("continuous", "binomial", "weights", "offset"), + .properties = c("continuous", "binomial", "semiparametric", "weights"), .train = function(task) { @@ -114,8 +115,8 @@ Lrnr_glm_semiparametric <- R6Class( } - fit_object = list(beta = beta, lrnr_baseline = lrnr_baseline, covariates = covariates, family = family, formula = formula, - append_interaction_matrix = append_interaction_matrix, binary = binary, task_baseline = task_baseline) + fit_object = list(beta = beta, lrnr_baseline = lrnr_baseline, covariates = covariates, family = family, formula = formula, + append_interaction_matrix = append_interaction_matrix, binary = binary, task_baseline = task_baseline) return(fit_object) }, .predict = function(task) { diff --git a/R/Param_npCATE.R b/R/Param_npCATE.R new file mode 100644 index 00000000..8389dfd4 --- /dev/null +++ b/R/Param_npCATE.R @@ -0,0 +1,195 @@ +#' Average Treatment Effect +#' +#' Parameter definition for the Average Treatment Effect (ATE). +#' @importFrom R6 R6Class +#' @importFrom uuid UUIDgenerate +#' @importFrom methods is +#' @family Parameters +#' @keywords data +#' +#' @return \code{Param_base} object +#' +#' @format \code{\link{R6Class}} object. +#' +#' @section Constructor: +#' \code{define_param(Param_ATT, observed_likelihood, intervention_list, ..., outcome_node)} +#' +#' \describe{ +#' \item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood +#' } +#' \item{\code{formula_CATE}}{... +#' } +#' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. +#' } +#' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. +#' } +#' \item{\code{...}}{Not currently used. +#' } +#' \item{\code{outcome_node}}{character, the name of the node that should be treated as the outcome +#' } +#' } +#' + +#' @section Fields: +#' \describe{ +#' \item{\code{cf_likelihood_treatment}}{the counterfactual likelihood for the treatment +#' } +#' \item{\code{cf_likelihood_control}}{the counterfactual likelihood for the control +#' } +#' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention +#' } +#' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention +#' } +#' } +#' @export +Param_npCATE <- R6Class( + classname = "Param_npCATE", + portable = TRUE, + class = TRUE, + inherit = Param_base, + public = list( + initialize = function(observed_likelihood, formula_CATE =~ 1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { + super$initialize(observed_likelihood, list(), outcome_node) + training_task <- self$observed_likelihood$training_task + W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") + V <- model.matrix(formula_CATE, as.data.frame(W)) + private$.targeted <- rep(T,ncol(V)) + + if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { + # add delta_Y=0 to intervention lists + outcome_censoring_node <- observed_likelihood$censoring_nodes[[outcome_node]] + censoring_intervention <- define_lf(LF_static, outcome_censoring_node, value = 1) + intervention_list_treatment <- c(intervention_list_treatment, censoring_intervention) + intervention_list_control <- c(intervention_list_control, censoring_intervention) + } + private$.formula_CATE <- formula_CATE + private$.cf_likelihood_treatment <- CF_Likelihood$new(observed_likelihood, intervention_list_treatment) + private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) + }, + clever_covariates = function(tmle_task = NULL, fold_number = "full", is_training_task = TRUE) { + + + training_task <- self$observed_likelihood$training_task + if (is.null(tmle_task)) { + tmle_task <- training_task + } + + + cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] + cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] + intervention_nodes <- union(names(self$intervention_list_treatment), names(self$intervention_list_control)) + + W <- tmle_task$get_tmle_node("W") + V <- model.matrix(self$formula_CATE, as.data.frame(W)) + A <- tmle_task$get_tmle_node("A", format = T )[[1]] + Y <- tmle_task$get_tmle_node("Y", format = T )[[1]] + W_train <- training_task$get_tmle_node("W") + V_train <- model.matrix(self$formula_OR, as.data.frame(W_train)) + A_train <- training_task$get_tmle_node("A", format = TRUE)[[1]] + Y_train <- training_task$get_tmle_node("Y", format = TRUE)[[1]] + + g <- self$observed_likelihood$get_likelihoods(tmle_task, "A", fold_number) + g1 <- ifelse(A==1, g, 1-g) + g0 <- 1-g1 + + Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) + Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) + Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + beta <- coef(glm.fit(V_train, Q1-Q0, family = gaussian(), weights = self$weights)) + CATE <- V %*% beta + # var_Y <- self$cf_likelihood_treatment$get_likelihoods(tmle_task, "var_Y", fold_number) + # var_Y0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "var_Y", fold_number) + # var_Y1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "var_Y", fold_number) + + + H <- V*(A/g1 - (1-A)*(1/g0)) + + EIF_Y <- NULL + # Store EIF component + if(is_training_task) { + scale <- apply(V,2, function(v) {apply(self$weights *(v ),2,mean ) }) + + scaleinv <- solve(scale) + EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y-Q) + EIF_WA <- apply(V, 2, function(v) { + self$weights*(v*(Q1 - Q0 - CATE) - mean(self$weights*(Q1 - Q0 - CATE))) + }) %*% scaleinv + + # print(dim(EIF_Y)) + #print(mean(EIF_Y)) + } + + + return(list(Y = H, EIF = list(Y = EIF_Y, WA = EIF_WA))) + }, + estimates = function(tmle_task = NULL, fold_number = "full") { + if (is.null(tmle_task)) { + tmle_task <- self$observed_likelihood$training_task + } + cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] + cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] + + W <- tmle_task$get_tmle_node("W") + A <- tmle_task$get_tmle_node("A", format = T )[[1]] + Y <- tmle_task$get_tmle_node("Y", format = T )[[1]] + + weights <- tmle_task$weights + # clever_covariates happen here (for this param) only, but this is repeated computation + EIF <- self$clever_covariates(tmle_task, fold_number, is_training_task = TRUE)$EIF + EIF <- EIF$Y + EIF$WA + Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) + Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) + Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + Qtest <- ifelse(A==1, Q1, Q0) + if(!all(Qtest-Q==0)) { + print(quantile(abs(Qtest-Q))) + stop("Q and Q1,Q0 dont match") + } + # Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + # Q0 <- Q_packed[[1]] + # Q1 <- Q_packed[[2]] + # Q <- Q_packed[[3]] + + beta <- get_beta(W, A, self$formula_CATE, Q1, Q0, family = gaussian(), weights = weights) + + CATE <- Q1 - Q0 + + IC <- as.matrix(EIF) + + result <- list(psi = beta, IC = IC, CATE = CATE) + return(result) + } + ), + active = list( + name = function() { + param_form <- sprintf("ATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + return(param_form) + }, + cf_likelihood_treatment = function() { + return(private$.cf_likelihood_treatment) + }, + cf_likelihood_control = function() { + return(private$.cf_likelihood_control) + }, + intervention_list_treatment = function() { + return(self$cf_likelihood_treatment$intervention_list) + }, + intervention_list_control = function() { + return(self$cf_likelihood_control$intervention_list) + }, + update_nodes = function() { + return(c(self$outcome_node)) + }, + formula_CATE = function(){ + return(private$.formula_CATE) + } + ), + private = list( + .type = "CATE", + .cf_likelihood_treatment = NULL, + .cf_likelihood_control = NULL, + .supports_outcome_censoring = TRUE, + .formula_CATE = NULL, + .submodel = list(Y = "gaussian_identity") + ) +) diff --git a/R/Param_npCATT.R b/R/Param_npCATT.R new file mode 100644 index 00000000..a91c8873 --- /dev/null +++ b/R/Param_npCATT.R @@ -0,0 +1,194 @@ +#' Average Treatment Effect +#' +#' Parameter definition for the Average Treatment Effect (ATE). +#' @importFrom R6 R6Class +#' @importFrom uuid UUIDgenerate +#' @importFrom methods is +#' @family Parameters +#' @keywords data +#' +#' @return \code{Param_base} object +#' +#' @format \code{\link{R6Class}} object. +#' +#' @section Constructor: +#' \code{define_param(Param_ATT, observed_likelihood, intervention_list, ..., outcome_node)} +#' +#' \describe{ +#' \item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood +#' } +#' \item{\code{formula_CATT}}{... +#' } +#' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. +#' } +#' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. +#' } +#' \item{\code{...}}{Not currently used. +#' } +#' \item{\code{outcome_node}}{character, the name of the node that should be treated as the outcome +#' } +#' } +#' + +#' @section Fields: +#' \describe{ +#' \item{\code{cf_likelihood_treatment}}{the counterfactual likelihood for the treatment +#' } +#' \item{\code{cf_likelihood_control}}{the counterfactual likelihood for the control +#' } +#' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention +#' } +#' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention +#' } +#' } +#' @export +Param_npCATT <- R6Class( + classname = "Param_npCATT", + portable = TRUE, + class = TRUE, + inherit = Param_base, + public = list( + initialize = function(observed_likelihood, formula_CATT =~ 1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { + super$initialize(observed_likelihood, list(), outcome_node) + training_task <- self$observed_likelihood$training_task + W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") + V <- model.matrix(formula_CATT, as.data.frame(W)) + private$.targeted <- rep(T,ncol(V)) + + if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { + # add delta_Y=0 to intervention lists + outcome_censoring_node <- observed_likelihood$censoring_nodes[[outcome_node]] + censoring_intervention <- define_lf(LF_static, outcome_censoring_node, value = 1) + intervention_list_treatment <- c(intervention_list_treatment, censoring_intervention) + intervention_list_control <- c(intervention_list_control, censoring_intervention) + } + private$.formula_CATT <- formula_CATT + private$.cf_likelihood_treatment <- CF_Likelihood$new(observed_likelihood, intervention_list_treatment) + private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) + }, + clever_covariates = function(tmle_task = NULL, fold_number = "full", is_training_task = TRUE) { + + + training_task <- self$observed_likelihood$training_task + if (is.null(tmle_task)) { + tmle_task <- training_task + } + + + cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] + cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] + intervention_nodes <- union(names(self$intervention_list_treatment), names(self$intervention_list_control)) + + W <- tmle_task$get_tmle_node("W") + V <- model.matrix(self$formula_CATT, as.data.frame(W)) + A <- tmle_task$get_tmle_node("A", format = T )[[1]] + Y <- tmle_task$get_tmle_node("Y", format = T )[[1]] + W_train <- training_task$get_tmle_node("W") + V_train <- model.matrix(self$formula_OR, as.data.frame(W_train)) + A_train <- training_task$get_tmle_node("A", format = TRUE)[[1]] + Y_train <- training_task$get_tmle_node("Y", format = TRUE)[[1]] + + g <- self$observed_likelihood$get_likelihoods(tmle_task, "A", fold_number) + g1 <- ifelse(A==1, g, 1-g) + g0 <- 1-g1 + + Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) + Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) + Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + beta <- get_beta(W_train, A_train, self$formula_CATT, Q1, Q0, family = gaussian(), weights = self$weights) + + # var_Y <- self$cf_likelihood_treatment$get_likelihoods(tmle_task, "var_Y", fold_number) + # var_Y0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "var_Y", fold_number) + # var_Y1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "var_Y", fold_number) + + H <- V*(A - (1-A)*(g1/g0)) + + EIF_Y <- NULL + # Store EIF component + if(is_training_task) { + scale <- apply(V,2, function(v) {apply(self$weights *(A*v ),2,mean ) }) + + scaleinv <- solve(scale) + EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y-Q) + EIF_WA <- apply(V, 2, function(v) { + self$weights*(A*v*(Q1 - V%*%beta - Q0)) - mean(self$weights*(A*v*(Q1 - V%*%beta - Q0))) + }) %*% scaleinv + + # print(dim(EIF_Y)) + #print(mean(EIF_Y)) + } + + + return(list(Y = H, EIF = list(Y = EIF_Y, WA = EIF_WA))) + }, + estimates = function(tmle_task = NULL, fold_number = "full") { + if (is.null(tmle_task)) { + tmle_task <- self$observed_likelihood$training_task + } + cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] + cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] + + W <- tmle_task$get_tmle_node("W") + A <- tmle_task$get_tmle_node("A", format = T )[[1]] + Y <- tmle_task$get_tmle_node("Y", format = T )[[1]] + + weights <- tmle_task$weights + # clever_covariates happen here (for this param) only, but this is repeated computation + EIF <- self$clever_covariates(tmle_task, fold_number, is_training_task = TRUE)$EIF + EIF <- EIF$Y + EIF$WA + Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) + Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) + Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + Qtest <- ifelse(A==1, Q1, Q0) + if(!all(Qtest-Q==0)) { + print(quantile(abs(Qtest-Q))) + stop("Q and Q1,Q0 dont match") + } + # Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + # Q0 <- Q_packed[[1]] + # Q1 <- Q_packed[[2]] + # Q <- Q_packed[[3]] + + beta <- get_beta(W, A, self$formula_CATT, Q1, Q0, family = gaussian(), weights = weights) + + CATE <- Q1 - Q0 + + IC <- as.matrix(EIF) + + result <- list(psi = beta, IC = IC, CATE = CATE) + return(result) + } + ), + active = list( + name = function() { + param_form <- sprintf("ATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + return(param_form) + }, + cf_likelihood_treatment = function() { + return(private$.cf_likelihood_treatment) + }, + cf_likelihood_control = function() { + return(private$.cf_likelihood_control) + }, + intervention_list_treatment = function() { + return(self$cf_likelihood_treatment$intervention_list) + }, + intervention_list_control = function() { + return(self$cf_likelihood_control$intervention_list) + }, + update_nodes = function() { + return(c(self$outcome_node)) + }, + formula_CATT = function(){ + return(private$.formula_CATT) + } + ), + private = list( + .type = "CATE", + .cf_likelihood_treatment = NULL, + .cf_likelihood_control = NULL, + .supports_outcome_censoring = TRUE, + .formula_CATT = NULL, + .submodel = list(Y = "gaussian_identity") + ) +) diff --git a/R/Param_npOR.R b/R/Param_npOR.R new file mode 100644 index 00000000..90e1c25f --- /dev/null +++ b/R/Param_npOR.R @@ -0,0 +1,192 @@ +#' Average Treatment Effect +#' +#' Parameter definition for the Average Treatment Effect (ATE). +#' @importFrom R6 R6Class +#' @importFrom uuid UUIDgenerate +#' @importFrom methods is +#' @family Parameters +#' @keywords data +#' +#' @return \code{Param_base} object +#' +#' @format \code{\link{R6Class}} object. +#' +#' @section Constructor: +#' \code{define_param(Param_ATT, observed_likelihood, intervention_list, ..., outcome_node)} +#' +#' \describe{ +#' \item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood +#' } +#' \item{\code{formula_OR}}{... +#' } +#' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. +#' } +#' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. +#' } +#' \item{\code{...}}{Not currently used. +#' } +#' \item{\code{outcome_node}}{character, the name of the node that should be treated as the outcome +#' } +#' } +#' + +#' @section Fields: +#' \describe{ +#' \item{\code{cf_likelihood_treatment}}{the counterfactual likelihood for the treatment +#' } +#' \item{\code{cf_likelihood_control}}{the counterfactual likelihood for the control +#' } +#' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention +#' } +#' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention +#' } +#' } +#' @export +Param_npOR <- R6Class( + classname = "Param_npOR", + portable = TRUE, + class = TRUE, + inherit = Param_base, + public = list( + initialize = function(observed_likelihood, formula_OR =~ 1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { + super$initialize(observed_likelihood, list(), outcome_node) + if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { + # add delta_Y=0 to intervention lists + outcome_censoring_node <- observed_likelihood$censoring_nodes[[outcome_node]] + censoring_intervention <- define_lf(LF_static, outcome_censoring_node, value = 1) + intervention_list_treatment <- c(intervention_list_treatment, censoring_intervention) + intervention_list_control <- c(intervention_list_control, censoring_intervention) + } + private$.formula_OR <- formula_OR + private$.cf_likelihood_treatment <- CF_Likelihood$new(observed_likelihood, intervention_list_treatment) + private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) + }, + clever_covariates = function(tmle_task = NULL, fold_number = "full", is_training_task = TRUE) { + + + training_task <- self$observed_likelihood$training_task + if (is.null(tmle_task)) { + tmle_task <- training_task + } + + + cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] + cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] + intervention_nodes <- union(names(self$intervention_list_treatment), names(self$intervention_list_control)) + + W <- tmle_task$get_tmle_node("W") + V <- model.matrix(self$formula_OR, as.data.frame(W)) + A <- tmle_task$get_tmle_node("A", format = TRUE)[[1]] + Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] + W_train <- training_task$get_tmle_node("W") + V_train <- model.matrix(self$formula_OR, as.data.frame(W_train)) + A_train <- training_task$get_tmle_node("A", format = TRUE)[[1]] + Y_train <- training_task$get_tmle_node("Y", format = TRUE)[[1]] + + g <- self$observed_likelihood$get_likelihoods(tmle_task, "A", fold_number) + g1 <- ifelse(A==1, g, 1-g) + g0 <- 1-g1 + + Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) + Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) + Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + Qorig <- Q + Q0 <- bound(Q0, 0.005) + Q1 <- bound(Q1, 0.005) + beta <- get_beta(W_train, A_train, self$formula_OR, Q1, Q0, family = binomial(), weights = self$weights) + Q1beta <- plogis(qlogis(Q0) + V%*%beta) + ORbeta <- Q1beta*(1-Q1beta) / (Q0*(1-Q0)) + omega <- (g0 + g1*ORbeta) / (g0) + + + h_star <- -1*as.vector((g1*ORbeta) / (g1*ORbeta + (1-g1))) + H <- as.matrix(omega*V*(A + h_star)) + + # Store EIF component + EIF_Y <- NULL + EIFWA <- NULL + if(is_training_task) { + tryCatch({ + scale <- apply(V,2, function(v){apply(self$weights*as.vector( Q1beta*(1-Q1beta) * Q0*(1-Q0) * g1 * (1-g1) / (g1 * Q1beta*(1-Q1beta) + (1-g1) *Q0*(1-Q0) )) * v*V,2,mean)}) + scaleinv <- solve(scale) + EIF_Y <- self$weights * (H%*% scaleinv) * (Y-Q) + EIFWA <- apply(V, 2, function(v) { + (weights*(A*v*(Q1 - Q1beta)) - mean( self$weights*(A*v*(Q1 - Q1beta)))) + }) %*% scale_inv + }, error = function(...){ + + }) + } + + return(list(Y = H, EIF = list(Y = EIF_Y, WA = EIFWA))) + }, + estimates = function(tmle_task = NULL, fold_number = "full") { + if (is.null(tmle_task)) { + tmle_task <- self$observed_likelihood$training_task + } + cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] + cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] + + W <- tmle_task$get_tmle_node("W") + A <- tmle_task$get_tmle_node("A", format = TRUE)[[1]] + Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] + weights <- tmle_task$weights + # clever_covariates happen here (for this param) only, but this is repeated computation + EIF <- self$clever_covariates(tmle_task, fold_number, is_training_task = TRUE)$EIF + EIF <- EIF$Y + EIF$WA + Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) + Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) + Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + Qtest <- ifelse(A==1, Q1, Q0) + if(!all(Qtest-Q==0)) { + stop("Q and Q1,Q0 dont match") + } + # Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + # Q0 <- Q_packed[[1]] + # Q1 <- Q_packed[[2]] + # Q <- Q_packed[[3]] + Q0 <- bound(Q0, 0.0005) + Q1 <- bound(Q1, 0.0005) + beta <- get_beta(W, A, self$formula_OR, Q1, Q0, family = binomial(), weights = weights) + V <- model.matrix(self$formula_OR, as.data.frame(W)) + OR <- exp(V%*%beta) + + IC <- EIF + + result <- list(psi = beta, IC = IC, OR = OR) + return(result) + } + ), + active = list( + name = function() { + param_form <- sprintf("ATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + return(param_form) + }, + cf_likelihood_treatment = function() { + return(private$.cf_likelihood_treatment) + }, + cf_likelihood_control = function() { + return(private$.cf_likelihood_control) + }, + intervention_list_treatment = function() { + return(self$cf_likelihood_treatment$intervention_list) + }, + intervention_list_control = function() { + return(self$cf_likelihood_control$intervention_list) + }, + update_nodes = function() { + return(c(self$outcome_node)) + }, + formula_OR = function(){ + return(private$.formula_OR) + } + ), + private = list( + .type = "OR", + .cf_likelihood_treatment = NULL, + .cf_likelihood_control = NULL, + .supports_outcome_censoring = TRUE, + .formula_OR = NULL, + .submodel = list(Y = "gaussian_identity") + ) +) diff --git a/R/tmle3_Spec_npCausalGLM.R b/R/tmle3_Spec_npCausalGLM.R new file mode 100644 index 00000000..66fa68db --- /dev/null +++ b/R/tmle3_Spec_npCausalGLM.R @@ -0,0 +1,91 @@ +#' Defines a TML Estimator (except for the data) +#' +#' Current limitations: pretty much tailored to \code{Param_TSM} +#' +#' @importFrom R6 R6Class +#' +#' @export +# +tmle3_Spec_npCausalGLM <- R6Class( + classname = "tmle3_Spec_npCausalGLM", + portable = TRUE, + class = TRUE, + public = list( + initialize = function(formula, estimand = c("CATE", "OR", "RR"), treatment_level = 1, control_level =0, + likelihood_override = NULL, + variable_types = NULL, ...) { + estimand <- match.arg(estimand) + private$.options <- list(estimand = estimand, formula = formula, + treatment_level = treatment_level, control_level = control_level, + likelihood_override = likelihood_override, + variable_types = variable_types, ... + ) + }, + make_tmle_task = function(data, node_list, ...) { + variable_types <- self$options$variable_types + include_variance_node <- self$options$estimand == "CATE" + + tmle_task <- point_tx_task(data, node_list, variable_types, scale_outcome = FALSE, include_variance_node = include_variance_node) + + return(tmle_task) + }, + make_initial_likelihood = function(tmle_task, learner_list = NULL ) { + #Wrap baseline learner in semiparametric learner + + # produce trained likelihood when likelihood_def provided + if (!is.null(self$options$likelihood_override)) { + likelihood <- self$options$likelihood_override$train(tmle_task) + } else { + likelihood <- point_tx_likelihood(tmle_task, learner_list) + } + + return(likelihood) + }, + make_updater = function(convergence_type = "sample_size", verbose = TRUE,...) { + if(self$options$estimand == "CATE"){ + updater <- tmle3_Update$new(maxit=100,one_dimensional = FALSE, verbose = verbose, constrain_step = FALSE, bounds = c(-Inf, Inf), ...) + } else if (self$options$estimand == "OR"){ + updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose,delta_epsilon = 0.001, constrain_step = TRUE, bounds = 0.0025, ...) + } else if (self$options$estimand == "RR"){ + updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.001, constrain_step = TRUE, bounds = c(0.0025, Inf), ...) + } + return(updater) + }, + make_targeted_likelihood = function(likelihood, updater) { + targeted_likelihood <- Targeted_Likelihood$new(likelihood, updater) + return(targeted_likelihood) + }, + make_params = function(tmle_task, targeted_likelihood) { + treatment_value <- self$options$treatment_level + control_value <- self$options$control_level + A_levels <- tmle_task$npsem[["A"]]$variable_type$levels + if (!is.null(A_levels)) { + treatment_value <- factor(treatment_value, levels = A_levels) + control_value <- factor(control_value, levels = A_levels) + } + treatment <- define_lf(LF_static, "A", value = treatment_value) + control <- define_lf(LF_static, "A", value = control_value) + formula <- self$options$formula + if(self$options$estimand == "CATE"){ + param <- Param_spCATE$new(targeted_likelihood,formula, treatment, control) + } else if (self$options$estimand == "OR"){ + param <- Param_spOR$new(targeted_likelihood,formula, treatment, control) + } else if (self$options$estimand == "RR"){ + param <- Param_spRR$new(targeted_likelihood, formula, treatment, control) + } + return(list(param)) + } + ), + active = list( + options = function() { + return(private$.options) + }, + family = function() { + return(private$.families[[self$options$estimand]]) + } + ), + private = list( + .options = NULL, + .families = list("CATE" = gaussian(), "RR" = poisson(), "OR" = binomial()) + ) +) diff --git a/man/Lrnr_glm_semiparametric.Rd b/man/Lrnr_glm_semiparametric.Rd index 2c9ea56c..2ad52413 100644 --- a/man/Lrnr_glm_semiparametric.Rd +++ b/man/Lrnr_glm_semiparametric.Rd @@ -13,7 +13,8 @@ Learner object with methods for training and prediction. See } \description{ This learner provides fitting procedures for semiparametric generalized linear models using a user-given baseline learner and -\code{\link[stats]{glm.fit}}. +\code{\link[stats]{glm.fit}}. It supports models of the form \verb{linkfun(E[Y|A,W]) = linkfun(E[Y|A=0,W]) + A * f(W)} where \code{A} is a binary or continuous interaction variable, +and \code{f(W)} is a user-specified parametric function (e.g. \code{f(W) = model.matrix(formula_sp, W)}). } \section{Parameters}{ @@ -23,7 +24,7 @@ This learner provides fitting procedures for semiparametric generalized linear m \item{\code{interaction_variable}}{A interaction variable to multiply with the design matrix generated by \code{formula_sp}. If NULL then the interaction variable is treated as the value 1. In many applications, this represents a binary treatment variable \code{A}.} \item{\code{family}}{A family object whose link function specifies the type of semiparametric model (e.g. partially-linear least-squares (\code{gaussian}), partially-linear logistic regression (\code{binomial}), partially-linear relative-risk regression (\code{poisson}) } -\item{\code{append_interaction_matrix}}{Whether to \code{lrnr_baseline} should be fit on \code{cbind(task$X,V)} where \code{V} is the design matrix obtained from \code{formula_sp}. +\item{\code{append_interaction_matrix}}{Whether to \code{lrnr_baseline} should be fit on \code{cbind(task$X,A*V)} where \code{A} is the interaction variable and \code{V} is the design matrix obtained from \code{formula_sp}. Note, if \code{append_interaction_matrix = TRUE}, the resulting estimator will be projected onto the semiparametric model using \code{glm.fit}. If this is FALSE and \code{interaction_variable} is binary then the semiparametric model is learned by stratifying on \code{interaction_variable}. Specifically, if FALSE, \code{lrnr_baseline} is used to estimate \verb{E[Y|A=0,W]} by subsetting to only observations with \code{A} = 0. diff --git a/man/Param_ATC.Rd b/man/Param_ATC.Rd index 618ed4f2..1afd5b8e 100644 --- a/man/Param_ATC.Rd +++ b/man/Param_ATC.Rd @@ -63,6 +63,8 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_ATE.Rd b/man/Param_ATE.Rd index 57ce4807..d8de0787 100644 --- a/man/Param_ATE.Rd +++ b/man/Param_ATE.Rd @@ -53,6 +53,8 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_ATT.Rd b/man/Param_ATT.Rd index 7f92f357..36b076b1 100644 --- a/man/Param_ATT.Rd +++ b/man/Param_ATT.Rd @@ -63,6 +63,8 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_MSM.Rd b/man/Param_MSM.Rd index ad4716e0..0dc472ee 100644 --- a/man/Param_MSM.Rd +++ b/man/Param_MSM.Rd @@ -61,6 +61,8 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_TSM.Rd b/man/Param_TSM.Rd index 6f1fdf3b..d39064c5 100644 --- a/man/Param_TSM.Rd +++ b/man/Param_TSM.Rd @@ -57,6 +57,8 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_base.Rd b/man/Param_base.Rd index 8ec0cf42..bff0ab6b 100644 --- a/man/Param_base.Rd +++ b/man/Param_base.Rd @@ -71,6 +71,8 @@ Other Parameters: \code{\link{Param_TSM}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_delta.Rd b/man/Param_delta.Rd index 49a4b926..662dac30 100644 --- a/man/Param_delta.Rd +++ b/man/Param_delta.Rd @@ -18,6 +18,8 @@ Other Parameters: \code{\link{Param_TSM}}, \code{\link{Param_base}}, \code{\link{Param_mean}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_mean.Rd b/man/Param_mean.Rd index d56c1b88..eae83dbb 100644 --- a/man/Param_mean.Rd +++ b/man/Param_mean.Rd @@ -46,6 +46,8 @@ Other Parameters: \code{\link{Param_TSM}}, \code{\link{Param_base}}, \code{\link{Param_delta}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_npCATE.Rd b/man/Param_npCATE.Rd new file mode 100644 index 00000000..77c4b137 --- /dev/null +++ b/man/Param_npCATE.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Param_npCATE.R +\name{Param_npCATE} +\alias{Param_npCATE} +\title{Average Treatment Effect} +\format{ +\code{\link{R6Class}} object. +} +\value{ +\code{Param_base} object +} +\description{ +Parameter definition for the Average Treatment Effect (ATE). +} +\section{Constructor}{ + +\code{define_param(Param_ATT, observed_likelihood, intervention_list, ..., outcome_node)} + +\describe{ +\item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood +} +\item{\code{formula_CATE}}{... +} +\item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. +} +\item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. +} +\item{\code{...}}{Not currently used. +} +\item{\code{outcome_node}}{character, the name of the node that should be treated as the outcome +} +} +} + +\section{Fields}{ + +\describe{ +\item{\code{cf_likelihood_treatment}}{the counterfactual likelihood for the treatment +} +\item{\code{cf_likelihood_control}}{the counterfactual likelihood for the control +} +\item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention +} +\item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention +} +} +} + +\seealso{ +Other Parameters: +\code{\link{Param_ATC}}, +\code{\link{Param_ATE}}, +\code{\link{Param_ATT}}, +\code{\link{Param_MSM}}, +\code{\link{Param_TSM}}, +\code{\link{Param_base}}, +\code{\link{Param_delta}}, +\code{\link{Param_mean}}, +\code{\link{Param_npOR}}, +\code{\link{Param_spCATE}}, +\code{\link{Param_spOR}}, +\code{\link{Param_spRR}}, +\code{\link{Param_stratified}}, +\code{\link{Param_survival}}, +\code{\link{define_param}()}, +\code{\link{tmle3_Fit}} +} +\concept{Parameters} +\keyword{data} diff --git a/man/Param_npOR.Rd b/man/Param_npOR.Rd new file mode 100644 index 00000000..b2f47bab --- /dev/null +++ b/man/Param_npOR.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Param_npOR.R +\name{Param_npOR} +\alias{Param_npOR} +\title{Average Treatment Effect} +\format{ +\code{\link{R6Class}} object. +} +\value{ +\code{Param_base} object +} +\description{ +Parameter definition for the Average Treatment Effect (ATE). +} +\section{Constructor}{ + +\code{define_param(Param_ATT, observed_likelihood, intervention_list, ..., outcome_node)} + +\describe{ +\item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood +} +\item{\code{formula_OR}}{... +} +\item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. +} +\item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. +} +\item{\code{...}}{Not currently used. +} +\item{\code{outcome_node}}{character, the name of the node that should be treated as the outcome +} +} +} + +\section{Fields}{ + +\describe{ +\item{\code{cf_likelihood_treatment}}{the counterfactual likelihood for the treatment +} +\item{\code{cf_likelihood_control}}{the counterfactual likelihood for the control +} +\item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention +} +\item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention +} +} +} + +\seealso{ +Other Parameters: +\code{\link{Param_ATC}}, +\code{\link{Param_ATE}}, +\code{\link{Param_ATT}}, +\code{\link{Param_MSM}}, +\code{\link{Param_TSM}}, +\code{\link{Param_base}}, +\code{\link{Param_delta}}, +\code{\link{Param_mean}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_spCATE}}, +\code{\link{Param_spOR}}, +\code{\link{Param_spRR}}, +\code{\link{Param_stratified}}, +\code{\link{Param_survival}}, +\code{\link{define_param}()}, +\code{\link{tmle3_Fit}} +} +\concept{Parameters} +\keyword{data} diff --git a/man/Param_spCATE.Rd b/man/Param_spCATE.Rd index d767fb31..e3525778 100644 --- a/man/Param_spCATE.Rd +++ b/man/Param_spCATE.Rd @@ -56,6 +56,8 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_npOR}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, \code{\link{Param_stratified}}, diff --git a/man/Param_spOR.Rd b/man/Param_spOR.Rd index 8296f894..2f3d4c44 100644 --- a/man/Param_spOR.Rd +++ b/man/Param_spOR.Rd @@ -56,6 +56,8 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spRR}}, \code{\link{Param_stratified}}, diff --git a/man/Param_spRR.Rd b/man/Param_spRR.Rd index 0733500d..164abe7a 100644 --- a/man/Param_spRR.Rd +++ b/man/Param_spRR.Rd @@ -56,6 +56,8 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_stratified}}, diff --git a/man/Param_stratified.Rd b/man/Param_stratified.Rd index 4a09a9a3..9c4e94ab 100644 --- a/man/Param_stratified.Rd +++ b/man/Param_stratified.Rd @@ -57,6 +57,8 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_survival.Rd b/man/Param_survival.Rd index 66d43181..ee8866cb 100644 --- a/man/Param_survival.Rd +++ b/man/Param_survival.Rd @@ -48,6 +48,8 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/define_param.Rd b/man/define_param.Rd index 7ed31df7..57191fc3 100644 --- a/man/define_param.Rd +++ b/man/define_param.Rd @@ -24,6 +24,8 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/tmle3_Fit.Rd b/man/tmle3_Fit.Rd index 45856eb1..ce0b331d 100644 --- a/man/tmle3_Fit.Rd +++ b/man/tmle3_Fit.Rd @@ -105,6 +105,8 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, From 1142b6b756e91d28749916d0990077a2f6841ce9 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 10:12:52 -0700 Subject: [PATCH 18/65] more np --- NAMESPACE | 2 + R/Param_npCATE.R | 4 +- R/Param_npCATT.R | 7 +++- R/Param_spCATE.R | 3 +- R/tmle3_Spec_npCausalGLM.R | 14 ++++--- man/Param_ATC.Rd | 1 + man/Param_ATE.Rd | 1 + man/Param_ATT.Rd | 1 + man/Param_MSM.Rd | 1 + man/Param_TSM.Rd | 1 + man/Param_base.Rd | 1 + man/Param_delta.Rd | 1 + man/Param_mean.Rd | 1 + man/Param_npCATE.Rd | 1 + man/Param_npCATT.Rd | 70 +++++++++++++++++++++++++++++++++++ man/Param_npOR.Rd | 1 + man/Param_spCATE.Rd | 1 + man/Param_spOR.Rd | 1 + man/Param_spRR.Rd | 1 + man/Param_stratified.Rd | 1 + man/Param_survival.Rd | 1 + man/define_param.Rd | 1 + man/tmle3_Fit.Rd | 1 + man/tmle3_Spec_npCausalGLM.Rd | 8 ++++ vignettes/testing.Rmd | 19 ++++++++-- 25 files changed, 129 insertions(+), 15 deletions(-) create mode 100644 man/Param_npCATT.Rd create mode 100644 man/tmle3_Spec_npCausalGLM.Rd diff --git a/NAMESPACE b/NAMESPACE index 3920d261..82e51ef0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,6 +21,7 @@ export(Param_base) export(Param_delta) export(Param_mean) export(Param_npCATE) +export(Param_npCATT) export(Param_npOR) export(Param_spCATE) export(Param_spOR) @@ -81,6 +82,7 @@ export(tmle3_Spec_OR) export(tmle3_Spec_PAR) export(tmle3_Spec_RR) export(tmle3_Spec_TSM_all) +export(tmle3_Spec_npCausalGLM) export(tmle3_Spec_spCausalGLM) export(tmle3_Spec_stratified) export(tmle3_Spec_survival) diff --git a/R/Param_npCATE.R b/R/Param_npCATE.R index 8389dfd4..68c60f7b 100644 --- a/R/Param_npCATE.R +++ b/R/Param_npCATE.R @@ -84,7 +84,7 @@ Param_npCATE <- R6Class( A <- tmle_task$get_tmle_node("A", format = T )[[1]] Y <- tmle_task$get_tmle_node("Y", format = T )[[1]] W_train <- training_task$get_tmle_node("W") - V_train <- model.matrix(self$formula_OR, as.data.frame(W_train)) + V_train <- model.matrix(self$formula_CATE, as.data.frame(W_train)) A_train <- training_task$get_tmle_node("A", format = TRUE)[[1]] Y_train <- training_task$get_tmle_node("Y", format = TRUE)[[1]] @@ -107,7 +107,7 @@ Param_npCATE <- R6Class( EIF_Y <- NULL # Store EIF component if(is_training_task) { - scale <- apply(V,2, function(v) {apply(self$weights *(v ),2,mean ) }) + scale <- apply(V,2, function(v) {apply(self$weights * V*(v ),2,mean ) }) scaleinv <- solve(scale) EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y-Q) diff --git a/R/Param_npCATT.R b/R/Param_npCATT.R index a91c8873..ee963d49 100644 --- a/R/Param_npCATT.R +++ b/R/Param_npCATT.R @@ -84,7 +84,7 @@ Param_npCATT <- R6Class( A <- tmle_task$get_tmle_node("A", format = T )[[1]] Y <- tmle_task$get_tmle_node("Y", format = T )[[1]] W_train <- training_task$get_tmle_node("W") - V_train <- model.matrix(self$formula_OR, as.data.frame(W_train)) + V_train <- model.matrix(self$formula_CATT, as.data.frame(W_train)) A_train <- training_task$get_tmle_node("A", format = TRUE)[[1]] Y_train <- training_task$get_tmle_node("Y", format = TRUE)[[1]] @@ -104,15 +104,18 @@ Param_npCATT <- R6Class( H <- V*(A - (1-A)*(g1/g0)) EIF_Y <- NULL + EIF_WA <- NULL # Store EIF component if(is_training_task) { - scale <- apply(V,2, function(v) {apply(self$weights *(A*v ),2,mean ) }) + tryCatch({ + scale <- apply(V,2, function(v) {apply(self$weights *(A*v*V ),2,mean ) }) scaleinv <- solve(scale) EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y-Q) EIF_WA <- apply(V, 2, function(v) { self$weights*(A*v*(Q1 - V%*%beta - Q0)) - mean(self$weights*(A*v*(Q1 - V%*%beta - Q0))) }) %*% scaleinv + }, error = function(...){}) # print(dim(EIF_Y)) #print(mean(EIF_Y)) diff --git a/R/Param_spCATE.R b/R/Param_spCATE.R index 32cd74e2..d07f32c7 100644 --- a/R/Param_spCATE.R +++ b/R/Param_spCATE.R @@ -112,11 +112,12 @@ Param_spCATE <- R6Class( EIF_Y <- NULL # Store EIF component if(is_training_task) { + tryCatch({ scale <- apply(V,2, function(v) {apply(self$weights * H *(A*v ),2,mean ) }) scaleinv <- solve(scale) EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y-Q) - + },error = function(...){}) # print(dim(EIF_Y)) #print(mean(EIF_Y)) diff --git a/R/tmle3_Spec_npCausalGLM.R b/R/tmle3_Spec_npCausalGLM.R index 66fa68db..1d1b02f1 100644 --- a/R/tmle3_Spec_npCausalGLM.R +++ b/R/tmle3_Spec_npCausalGLM.R @@ -11,7 +11,7 @@ tmle3_Spec_npCausalGLM <- R6Class( portable = TRUE, class = TRUE, public = list( - initialize = function(formula, estimand = c("CATE", "OR", "RR"), treatment_level = 1, control_level =0, + initialize = function(formula, estimand = c("CATE", "CATT", "OR", "RR"), treatment_level = 1, control_level =0, likelihood_override = NULL, variable_types = NULL, ...) { estimand <- match.arg(estimand) @@ -23,7 +23,7 @@ tmle3_Spec_npCausalGLM <- R6Class( }, make_tmle_task = function(data, node_list, ...) { variable_types <- self$options$variable_types - include_variance_node <- self$options$estimand == "CATE" + include_variance_node <- FALSE tmle_task <- point_tx_task(data, node_list, variable_types, scale_outcome = FALSE, include_variance_node = include_variance_node) @@ -42,7 +42,7 @@ tmle3_Spec_npCausalGLM <- R6Class( return(likelihood) }, make_updater = function(convergence_type = "sample_size", verbose = TRUE,...) { - if(self$options$estimand == "CATE"){ + if(self$options$estimand == "CATE" || self$options$estimand == "CATT"){ updater <- tmle3_Update$new(maxit=100,one_dimensional = FALSE, verbose = verbose, constrain_step = FALSE, bounds = c(-Inf, Inf), ...) } else if (self$options$estimand == "OR"){ updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose,delta_epsilon = 0.001, constrain_step = TRUE, bounds = 0.0025, ...) @@ -67,11 +67,13 @@ tmle3_Spec_npCausalGLM <- R6Class( control <- define_lf(LF_static, "A", value = control_value) formula <- self$options$formula if(self$options$estimand == "CATE"){ - param <- Param_spCATE$new(targeted_likelihood,formula, treatment, control) + param <- Param_npCATE$new(targeted_likelihood,formula, treatment, control) + } else if(self$options$estimand == "CATT"){ + param <- Param_npCATT$new(targeted_likelihood,formula, treatment, control) } else if (self$options$estimand == "OR"){ - param <- Param_spOR$new(targeted_likelihood,formula, treatment, control) + param <- Param_npOR$new(targeted_likelihood,formula, treatment, control) } else if (self$options$estimand == "RR"){ - param <- Param_spRR$new(targeted_likelihood, formula, treatment, control) + param <- Param_npRR$new(targeted_likelihood, formula, treatment, control) } return(list(param)) } diff --git a/man/Param_ATC.Rd b/man/Param_ATC.Rd index 1afd5b8e..20818d61 100644 --- a/man/Param_ATC.Rd +++ b/man/Param_ATC.Rd @@ -64,6 +64,7 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, +\code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_ATE.Rd b/man/Param_ATE.Rd index d8de0787..d4451ac2 100644 --- a/man/Param_ATE.Rd +++ b/man/Param_ATE.Rd @@ -54,6 +54,7 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, +\code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_ATT.Rd b/man/Param_ATT.Rd index 36b076b1..ef9c26a9 100644 --- a/man/Param_ATT.Rd +++ b/man/Param_ATT.Rd @@ -64,6 +64,7 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, +\code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_MSM.Rd b/man/Param_MSM.Rd index 0dc472ee..a6f915b1 100644 --- a/man/Param_MSM.Rd +++ b/man/Param_MSM.Rd @@ -62,6 +62,7 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, +\code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_TSM.Rd b/man/Param_TSM.Rd index d39064c5..f82946bb 100644 --- a/man/Param_TSM.Rd +++ b/man/Param_TSM.Rd @@ -58,6 +58,7 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, +\code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_base.Rd b/man/Param_base.Rd index bff0ab6b..9a03c7b0 100644 --- a/man/Param_base.Rd +++ b/man/Param_base.Rd @@ -72,6 +72,7 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, +\code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_delta.Rd b/man/Param_delta.Rd index 662dac30..a1c991d6 100644 --- a/man/Param_delta.Rd +++ b/man/Param_delta.Rd @@ -19,6 +19,7 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, +\code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_mean.Rd b/man/Param_mean.Rd index eae83dbb..14906ca2 100644 --- a/man/Param_mean.Rd +++ b/man/Param_mean.Rd @@ -47,6 +47,7 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_npCATE}}, +\code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_npCATE.Rd b/man/Param_npCATE.Rd index 77c4b137..66e07e50 100644 --- a/man/Param_npCATE.Rd +++ b/man/Param_npCATE.Rd @@ -56,6 +56,7 @@ Other Parameters: \code{\link{Param_base}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, +\code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_npCATT.Rd b/man/Param_npCATT.Rd new file mode 100644 index 00000000..026bf511 --- /dev/null +++ b/man/Param_npCATT.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Param_npCATT.R +\name{Param_npCATT} +\alias{Param_npCATT} +\title{Average Treatment Effect} +\format{ +\code{\link{R6Class}} object. +} +\value{ +\code{Param_base} object +} +\description{ +Parameter definition for the Average Treatment Effect (ATE). +} +\section{Constructor}{ + +\code{define_param(Param_ATT, observed_likelihood, intervention_list, ..., outcome_node)} + +\describe{ +\item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood +} +\item{\code{formula_CATT}}{... +} +\item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. +} +\item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. +} +\item{\code{...}}{Not currently used. +} +\item{\code{outcome_node}}{character, the name of the node that should be treated as the outcome +} +} +} + +\section{Fields}{ + +\describe{ +\item{\code{cf_likelihood_treatment}}{the counterfactual likelihood for the treatment +} +\item{\code{cf_likelihood_control}}{the counterfactual likelihood for the control +} +\item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention +} +\item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention +} +} +} + +\seealso{ +Other Parameters: +\code{\link{Param_ATC}}, +\code{\link{Param_ATE}}, +\code{\link{Param_ATT}}, +\code{\link{Param_MSM}}, +\code{\link{Param_TSM}}, +\code{\link{Param_base}}, +\code{\link{Param_delta}}, +\code{\link{Param_mean}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_npOR}}, +\code{\link{Param_spCATE}}, +\code{\link{Param_spOR}}, +\code{\link{Param_spRR}}, +\code{\link{Param_stratified}}, +\code{\link{Param_survival}}, +\code{\link{define_param}()}, +\code{\link{tmle3_Fit}} +} +\concept{Parameters} +\keyword{data} diff --git a/man/Param_npOR.Rd b/man/Param_npOR.Rd index b2f47bab..b3677b99 100644 --- a/man/Param_npOR.Rd +++ b/man/Param_npOR.Rd @@ -57,6 +57,7 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, +\code{\link{Param_npCATT}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_spCATE.Rd b/man/Param_spCATE.Rd index e3525778..53574037 100644 --- a/man/Param_spCATE.Rd +++ b/man/Param_spCATE.Rd @@ -57,6 +57,7 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, +\code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_spOR.Rd b/man/Param_spOR.Rd index 2f3d4c44..40311507 100644 --- a/man/Param_spOR.Rd +++ b/man/Param_spOR.Rd @@ -57,6 +57,7 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, +\code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spRR}}, diff --git a/man/Param_spRR.Rd b/man/Param_spRR.Rd index 164abe7a..b666d1e5 100644 --- a/man/Param_spRR.Rd +++ b/man/Param_spRR.Rd @@ -57,6 +57,7 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, +\code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_stratified.Rd b/man/Param_stratified.Rd index 9c4e94ab..98f559f0 100644 --- a/man/Param_stratified.Rd +++ b/man/Param_stratified.Rd @@ -58,6 +58,7 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, +\code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_survival.Rd b/man/Param_survival.Rd index ee8866cb..3a861b9d 100644 --- a/man/Param_survival.Rd +++ b/man/Param_survival.Rd @@ -49,6 +49,7 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, +\code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/define_param.Rd b/man/define_param.Rd index 57191fc3..ae912f7a 100644 --- a/man/define_param.Rd +++ b/man/define_param.Rd @@ -25,6 +25,7 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, +\code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/tmle3_Fit.Rd b/man/tmle3_Fit.Rd index ce0b331d..73fc0dd2 100644 --- a/man/tmle3_Fit.Rd +++ b/man/tmle3_Fit.Rd @@ -106,6 +106,7 @@ Other Parameters: \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, +\code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/tmle3_Spec_npCausalGLM.Rd b/man/tmle3_Spec_npCausalGLM.Rd new file mode 100644 index 00000000..1cc68b05 --- /dev/null +++ b/man/tmle3_Spec_npCausalGLM.Rd @@ -0,0 +1,8 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tmle3_Spec_npCausalGLM.R +\name{tmle3_Spec_npCausalGLM} +\alias{tmle3_Spec_npCausalGLM} +\title{Defines a TML Estimator (except for the data)} +\description{ +Current limitations: pretty much tailored to \code{Param_TSM} +} diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index 5fead7fa..f340b36e 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -15,16 +15,26 @@ library(sl3) n <- 200 W <- runif(n, -1, 1) A <- rbinom(n, size = 1, prob = plogis(W)) -Y <- rnorm(n, mean = A+W, sd = 0.1) +Y <- rnorm(n, mean = A+W, sd = 0.3) data <- data.table(W,A,Y) lrnr_Y0W <- Lrnr_glmnet$new() lrnr_A <- Lrnr_glm$new() -lrnr_sp <- Lrnr_glm_semiparametric$new(formula_sp=~1 , lrnr_Y0W, interaction_variable = "A", family = gaussian(), return_matrix_predictions = FALSE) + node_list <- list (W = "W", A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W, var_Y = Lrnr_mean$new()) -spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1, "CATE") +# spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1, "CATE") +# out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) +spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1, "CATE") +out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) +out + +spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1, "CATT") out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) +out +spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1, "CATE") +out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) +out ``` @@ -39,12 +49,13 @@ Y <- rbinom(n, size = 1, prob = plogis(A + W)) data <- data.table(W,A,Y) lrnr_Y0W <- Lrnr_glmnet$new() lrnr_A <- Lrnr_glm$new() -lrnr_sp <- Lrnr_glm_semiparametric$new(formula_sp=~1 , lrnr_Y0W, interaction_variable = "A", family = binomial(), return_matrix_predictions = FALSE) node_list <- list (W = "W", A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1, "OR") out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) out + + ``` From a4584d1a557dddd43dcb7010652037124abf7f46 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 11:12:20 -0700 Subject: [PATCH 19/65] minr bug fixes to npOR --- R/Param_npCATE.R | 6 +++- R/Param_npCATT.R | 7 +++-- R/Param_npOR.R | 49 ++++++++++++++++++-------------- R/Param_spCATE.R | 7 +++-- R/Param_spOR.R | 29 +++++++++---------- R/Param_spRR.R | 26 +++++++++-------- R/tmle3_Spec_npCausalGLM.R | 6 ++-- R/tmle3_spec_spCausalGLM.R | 6 ++-- man/Param_npCATE.Rd | 6 +++- man/Param_npCATT.Rd | 10 +++++-- man/Param_npOR.Rd | 14 ++++++++-- man/Param_spCATE.Rd | 10 +++++-- man/Param_spOR.Rd | 12 ++++++-- man/Param_spRR.Rd | 12 ++++++-- vignettes/testing.Rmd | 57 +++++++++++++++++++++++++++++--------- 15 files changed, 171 insertions(+), 86 deletions(-) diff --git a/R/Param_npCATE.R b/R/Param_npCATE.R index 68c60f7b..64dcfcb7 100644 --- a/R/Param_npCATE.R +++ b/R/Param_npCATE.R @@ -1,4 +1,8 @@ -#' Average Treatment Effect +#' Nonparametric inference for user-specified parametric working models for the conditional treatment effect. +#' The true conditional average treatment effect is projected onto a parametric working model using least-squares regression. +#' Unlike \code{Param_npCATT}, this function uses all observations to compute the projection. +#' This can be used to assess heterogeneity of the average treatment effect. +#' We note that `formula_CATE = ~ 1` gives an estimator of the nonparametric average treatment effect (ATE). #' #' Parameter definition for the Average Treatment Effect (ATE). #' @importFrom R6 R6Class diff --git a/R/Param_npCATT.R b/R/Param_npCATT.R index ee963d49..a09fde68 100644 --- a/R/Param_npCATT.R +++ b/R/Param_npCATT.R @@ -1,6 +1,7 @@ -#' Average Treatment Effect -#' -#' Parameter definition for the Average Treatment Effect (ATE). +#' Nonparametric inference for user-specified parametric working models for the conditional treatment effect. +#' The true conditional average treatment effect is projected onto a parametric working model using only individuals with `A=1` (among the treated). +#' This can be used to assess heterogeneity of the average treatment effect and avoids positivity issues by focusing on best approximating the conditional average treatment effect ampng the treated. +#' We note that `formula_CATT = ~ 1` gives an estimator of the nonparametric average treatment effect among the treated (ATT). #' @importFrom R6 R6Class #' @importFrom uuid UUIDgenerate #' @importFrom methods is diff --git a/R/Param_npOR.R b/R/Param_npOR.R index 90e1c25f..793f2ff7 100644 --- a/R/Param_npOR.R +++ b/R/Param_npOR.R @@ -1,5 +1,8 @@ -#' Average Treatment Effect -#' +#' Nonparametric inference for user-specified parametric working models for the conditional odds ratio between two binary variables +#' The true conditional odds ratio is projected onto a parametric working model using logistic-regression. +#' This can be used to assess heterogeneity of the odds ratio. +#' We note that `formula_logOR = ~ 1` gives an estimator of the nonparametric marginal odds ratio among the treated. +#' The parametric model is at the log-scale and therefore the coefficients returned code the linear predictor for the `log`-conditional odds ratio. #' Parameter definition for the Average Treatment Effect (ATE). #' @importFrom R6 R6Class #' @importFrom uuid UUIDgenerate @@ -17,7 +20,7 @@ #' \describe{ #' \item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood #' } -#' \item{\code{formula_OR}}{... +#' \item{\code{formula_logOR}}{... #' } #' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. #' } @@ -48,7 +51,7 @@ Param_npOR <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_OR =~ 1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { + initialize = function(observed_likelihood, formula_logOR =~ 1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists @@ -57,7 +60,7 @@ Param_npOR <- R6Class( intervention_list_treatment <- c(intervention_list_treatment, censoring_intervention) intervention_list_control <- c(intervention_list_control, censoring_intervention) } - private$.formula_OR <- formula_OR + private$.formula_logOR <- formula_logOR private$.cf_likelihood_treatment <- CF_Likelihood$new(observed_likelihood, intervention_list_treatment) private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) }, @@ -75,11 +78,11 @@ Param_npOR <- R6Class( intervention_nodes <- union(names(self$intervention_list_treatment), names(self$intervention_list_control)) W <- tmle_task$get_tmle_node("W") - V <- model.matrix(self$formula_OR, as.data.frame(W)) + V <- model.matrix(self$formula_logOR, as.data.frame(W)) A <- tmle_task$get_tmle_node("A", format = TRUE)[[1]] Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] W_train <- training_task$get_tmle_node("W") - V_train <- model.matrix(self$formula_OR, as.data.frame(W_train)) + V_train <- model.matrix(self$formula_logOR, as.data.frame(W_train)) A_train <- training_task$get_tmle_node("A", format = TRUE)[[1]] Y_train <- training_task$get_tmle_node("Y", format = TRUE)[[1]] @@ -93,13 +96,16 @@ Param_npOR <- R6Class( Qorig <- Q Q0 <- bound(Q0, 0.005) Q1 <- bound(Q1, 0.005) - beta <- get_beta(W_train, A_train, self$formula_OR, Q1, Q0, family = binomial(), weights = self$weights) + beta <- get_beta(W_train, A_train, self$formula_logOR, Q1, Q0, family = binomial(), weights = self$weights) + Q1beta <- plogis(qlogis(Q0) + V%*%beta) - ORbeta <- Q1beta*(1-Q1beta) / (Q0*(1-Q0)) - omega <- (g0 + g1*ORbeta) / (g0) + + sigma_rel <- Q1beta*(1-Q1beta) / (Q0*(1-Q0)) - h_star <- -1*as.vector((g1*ORbeta) / (g1*ORbeta + (1-g1))) + omega <- (g0 + g1*sigma_rel) / (g0) + + h_star <- -1*as.vector((g1*sigma_rel) / (g1*sigma_rel + (1-g1))) H <- as.matrix(omega*V*(A + h_star)) # Store EIF component @@ -107,15 +113,16 @@ Param_npOR <- R6Class( EIFWA <- NULL if(is_training_task) { tryCatch({ - scale <- apply(V,2, function(v){apply(self$weights*as.vector( Q1beta*(1-Q1beta) * Q0*(1-Q0) * g1 * (1-g1) / (g1 * Q1beta*(1-Q1beta) + (1-g1) *Q0*(1-Q0) )) * v*V,2,mean)}) + scale <- apply(V,2, function(v){apply(self$weights*(A * Q1beta*(1-Q1beta) * v*V),2,mean)}) scaleinv <- solve(scale) EIF_Y <- self$weights * (H%*% scaleinv) * (Y-Q) EIFWA <- apply(V, 2, function(v) { - (weights*(A*v*(Q1 - Q1beta)) - mean( self$weights*(A*v*(Q1 - Q1beta)))) - }) %*% scale_inv - }, error = function(...){ + (self$weights*(A*v*(Q1 - Q1beta)) - mean( self$weights*(A*v*(Q1 - Q1beta)))) + }) + + EIFWA <- EIFWA %*% scaleinv - }) + } ,error = function(...) {} ) } return(list(Y = H, EIF = list(Y = EIF_Y, WA = EIFWA))) @@ -147,8 +154,8 @@ Param_npOR <- R6Class( # Q <- Q_packed[[3]] Q0 <- bound(Q0, 0.0005) Q1 <- bound(Q1, 0.0005) - beta <- get_beta(W, A, self$formula_OR, Q1, Q0, family = binomial(), weights = weights) - V <- model.matrix(self$formula_OR, as.data.frame(W)) + beta <- get_beta(W, A, self$formula_logOR, Q1, Q0, family = binomial(), weights = weights) + V <- model.matrix(self$formula_logOR, as.data.frame(W)) OR <- exp(V%*%beta) IC <- EIF @@ -177,8 +184,8 @@ Param_npOR <- R6Class( update_nodes = function() { return(c(self$outcome_node)) }, - formula_OR = function(){ - return(private$.formula_OR) + formula_logOR = function(){ + return(private$.formula_logOR) } ), private = list( @@ -186,7 +193,7 @@ Param_npOR <- R6Class( .cf_likelihood_treatment = NULL, .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, - .formula_OR = NULL, + .formula_logOR = NULL, .submodel = list(Y = "gaussian_identity") ) ) diff --git a/R/Param_spCATE.R b/R/Param_spCATE.R index d07f32c7..4a342e9e 100644 --- a/R/Param_spCATE.R +++ b/R/Param_spCATE.R @@ -1,6 +1,7 @@ -#' Average Treatment Effect -#' -#' Parameter definition for the Average Treatment Effect (ATE). +#' Semiparametric estimation of the conditonal average treatment effect for arbitrary partially-linear least-squares regression models. +#' This is a semiparametric version of \code{Param_npCATT} and \code{Param_npCATE} where the parametric model for the CATE is assumed correct. +#' Assuming the semiparametric model to be true allows for some efficiency gain (when true) but may lead to less robust estimates due to misspecification. +#' Note a linear-link is used for the CATE. #' @importFrom R6 R6Class #' @importFrom uuid UUIDgenerate #' @importFrom methods is diff --git a/R/Param_spOR.R b/R/Param_spOR.R index 0ae0dbcf..75c18856 100644 --- a/R/Param_spOR.R +++ b/R/Param_spOR.R @@ -1,6 +1,7 @@ -#' Average Treatment Effect -#' -#' Parameter definition for the Average Treatment Effect (ATE). +#' Semiparametric estimation of the conditonal odds ratio for arbitrary partially-linear logistic regression models. +#' This is a semiparametric version of \code{Param_npOR} where the parametric model for the OR is assumed correct. +#' Assuming the semiparametric model to be true allows for some efficiency gain (when true) but may lead to less robust estimates due to misspecification. +#' The parametric model is at the log-scale and therefore the coefficients returned code the linear predictor for the `log`-conditional odds ratio. #' @importFrom R6 R6Class #' @importFrom uuid UUIDgenerate #' @importFrom methods is @@ -17,7 +18,7 @@ #' \describe{ #' \item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood #' } -#' \item{\code{formula_OR}}{... +#' \item{\code{formula_logOR}}{... #' } #' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. #' } @@ -48,7 +49,7 @@ Param_spOR <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_OR =~ 1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { + initialize = function(observed_likelihood, formula_logOR =~ 1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists @@ -57,7 +58,7 @@ Param_spOR <- R6Class( intervention_list_treatment <- c(intervention_list_treatment, censoring_intervention) intervention_list_control <- c(intervention_list_control, censoring_intervention) } - private$.formula_OR <- formula_OR + private$.formula_logOR <- formula_logOR private$.cf_likelihood_treatment <- CF_Likelihood$new(observed_likelihood, intervention_list_treatment) private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) }, @@ -75,7 +76,7 @@ Param_spOR <- R6Class( intervention_nodes <- union(names(self$intervention_list_treatment), names(self$intervention_list_control)) W <- tmle_task$get_tmle_node("W") - V <- model.matrix(self$formula_OR, as.data.frame(W)) + V <- model.matrix(self$formula_logOR, as.data.frame(W)) A <- tmle_task$get_tmle_node("A", format = TRUE)[[1]] Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] g <- self$observed_likelihood$get_likelihoods(tmle_task, "A", fold_number) @@ -91,10 +92,10 @@ Param_spOR <- R6Class( Qorig <- Q Q0 <- bound(Q0, 0.005) Q1 <- bound(Q1, 0.005) - OR <- Q1*(1-Q1) / (Q0*(1-Q0)) + sigma_rel <- Q1*(1-Q1) / (Q0*(1-Q0)) - h_star <- -1*as.vector((g1*OR) / (g1*OR + (1-g1))) + h_star <- -1*as.vector((g1*sigma_rel) / (g1*sigma_rel + (1-g1))) H <- as.matrix(V*(A + h_star)) # Store EIF component @@ -137,8 +138,8 @@ Param_spOR <- R6Class( # Q <- Q_packed[[3]] Q0 <- bound(Q0, 0.0005) Q1 <- bound(Q1, 0.0005) - beta <- get_beta(W, A, self$formula_OR, Q1, Q0, family = binomial(), weights = weights) - V <- model.matrix(self$formula_OR, as.data.frame(W)) + beta <- get_beta(W, A, self$formula_logOR, Q1, Q0, family = binomial(), weights = weights) + V <- model.matrix(self$formula_logOR, as.data.frame(W)) OR <- exp(V%*%beta) IC <- EIF @@ -167,8 +168,8 @@ Param_spOR <- R6Class( update_nodes = function() { return(c(self$outcome_node)) }, - formula_OR = function(){ - return(private$.formula_OR) + formula_logOR = function(){ + return(private$.formula_logOR) } ), private = list( @@ -176,7 +177,7 @@ Param_spOR <- R6Class( .cf_likelihood_treatment = NULL, .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, - .formula_OR = NULL, + .formula_logOR = NULL, .submodel = list(Y = "gaussian_identity") ) ) diff --git a/R/Param_spRR.R b/R/Param_spRR.R index 0e8c1c73..bdfb9711 100644 --- a/R/Param_spRR.R +++ b/R/Param_spRR.R @@ -1,6 +1,8 @@ -#' Average Treatment Effect -#' -#' Parameter definition for the Average Treatment Effect (ATE). +#' Semiparametric estimation of the conditonal relative risk/treatment-effect for arbitrary partially-linear log-linear/link regression models. +#' Arbitrary user-specified parametric models for the conditional relative-risk are supported. +#` This method implements semiparametric efficient relative-risk regression for nonnegative outcomes. +#' Assuming the semiparametric model to be true allows for some efficiency gain (when true) but may lead to less robust estimates due to misspecification. +#' The parametric model is at the log-scale and therefore the coefficients returned code the linear predictor for the `log`-relative-risk. #' @importFrom R6 R6Class #' @importFrom uuid UUIDgenerate #' @importFrom methods is @@ -17,7 +19,7 @@ #' \describe{ #' \item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood #' } -#' \item{\code{formula_RR}}{... +#' \item{\code{formula_logRR}}{... #' } #' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. #' } @@ -48,7 +50,7 @@ Param_spRR <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_RR =~ 1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { + initialize = function(observed_likelihood, formula_logRR =~ 1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists @@ -57,7 +59,7 @@ Param_spRR <- R6Class( intervention_list_treatment <- c(intervention_list_treatment, censoring_intervention) intervention_list_control <- c(intervention_list_control, censoring_intervention) } - private$.formula_RR <- formula_RR + private$.formula_logRR <- formula_logRR private$.cf_likelihood_treatment <- CF_Likelihood$new(observed_likelihood, intervention_list_treatment) private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) }, @@ -75,7 +77,7 @@ Param_spRR <- R6Class( intervention_nodes <- union(names(self$intervention_list_treatment), names(self$intervention_list_control)) W <- tmle_task$get_tmle_node("W") - V <- model.matrix(self$formula_RR, as.data.frame(W)) + V <- model.matrix(self$formula_logRR, as.data.frame(W)) A <- tmle_task$get_tmle_node("A", format = TRUE)[[1]] Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] @@ -144,8 +146,8 @@ Param_spRR <- R6Class( Q0 <- pmax(Q0, 0.0005) Q1 <- pmax(Q1, 0.0005) - beta <- get_beta(W, A, self$formula_RR, Q1, Q0, family = poisson(), weights = weights) - V <- model.matrix(self$formula_RR, as.data.frame(W)) + beta <- get_beta(W, A, self$formula_logRR, Q1, Q0, family = poisson(), weights = weights) + V <- model.matrix(self$formula_logRR, as.data.frame(W)) RR <- exp(V%*%beta) IC <- as.matrix(EIF) @@ -174,8 +176,8 @@ Param_spRR <- R6Class( update_nodes = function() { return(c(self$outcome_node)) }, - formula_RR = function(){ - return(private$.formula_RR) + formula_logRR = function(){ + return(private$.formula_logRR) } ), private = list( @@ -183,7 +185,7 @@ Param_spRR <- R6Class( .cf_likelihood_treatment = NULL, .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, - .formula_RR = NULL, + .formula_logRR = NULL, .submodel = list(Y = "poisson_log") ) ) diff --git a/R/tmle3_Spec_npCausalGLM.R b/R/tmle3_Spec_npCausalGLM.R index 1d1b02f1..53981f6d 100644 --- a/R/tmle3_Spec_npCausalGLM.R +++ b/R/tmle3_Spec_npCausalGLM.R @@ -41,13 +41,13 @@ tmle3_Spec_npCausalGLM <- R6Class( return(likelihood) }, - make_updater = function(convergence_type = "sample_size", verbose = TRUE,...) { + make_updater = function(convergence_type = "sample_size", verbose = F,...) { if(self$options$estimand == "CATE" || self$options$estimand == "CATT"){ updater <- tmle3_Update$new(maxit=100,one_dimensional = FALSE, verbose = verbose, constrain_step = FALSE, bounds = c(-Inf, Inf), ...) } else if (self$options$estimand == "OR"){ - updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose,delta_epsilon = 0.001, constrain_step = TRUE, bounds = 0.0025, ...) + updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose,delta_epsilon = 0.01, constrain_step = TRUE, bounds = 0.0025, ...) } else if (self$options$estimand == "RR"){ - updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.001, constrain_step = TRUE, bounds = c(0.0025, Inf), ...) + updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.01, constrain_step = TRUE, bounds = c(0.0025, Inf), ...) } return(updater) }, diff --git a/R/tmle3_spec_spCausalGLM.R b/R/tmle3_spec_spCausalGLM.R index 17ee032b..b8b9169b 100644 --- a/R/tmle3_spec_spCausalGLM.R +++ b/R/tmle3_spec_spCausalGLM.R @@ -43,13 +43,13 @@ tmle3_Spec_spCausalGLM <- R6Class( return(likelihood) }, - make_updater = function(convergence_type = "sample_size", verbose = TRUE,...) { + make_updater = function(convergence_type = "sample_size", verbose = F,...) { if(self$options$estimand == "CATE"){ updater <- tmle3_Update$new(maxit=100,one_dimensional = FALSE, verbose = verbose, constrain_step = FALSE, bounds = c(-Inf, Inf), ...) } else if (self$options$estimand == "OR"){ - updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose,delta_epsilon = 0.001, constrain_step = TRUE, bounds = 0.0025, ...) + updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose,delta_epsilon = 0.01, constrain_step = TRUE, bounds = 0.0025, ...) } else if (self$options$estimand == "RR"){ - updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.001, constrain_step = TRUE, bounds = c(0.0025, Inf), ...) + updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.01, constrain_step = TRUE, bounds = c(0.0025, Inf), ...) } return(updater) }, diff --git a/man/Param_npCATE.Rd b/man/Param_npCATE.Rd index 66e07e50..fbe47726 100644 --- a/man/Param_npCATE.Rd +++ b/man/Param_npCATE.Rd @@ -2,7 +2,11 @@ % Please edit documentation in R/Param_npCATE.R \name{Param_npCATE} \alias{Param_npCATE} -\title{Average Treatment Effect} +\title{Nonparametric inference for user-specified parametric working models for the conditional treatment effect. +The true conditional average treatment effect is projected onto a parametric working model using least-squares regression. +Unlike \code{Param_npCATT}, this function uses all observations to compute the projection. +This can be used to assess heterogeneity of the average treatment effect. +We note that \code{formula_CATE = ~ 1} gives an estimator of the nonparametric average treatment effect (ATE).} \format{ \code{\link{R6Class}} object. } diff --git a/man/Param_npCATT.Rd b/man/Param_npCATT.Rd index 026bf511..2571d240 100644 --- a/man/Param_npCATT.Rd +++ b/man/Param_npCATT.Rd @@ -2,7 +2,10 @@ % Please edit documentation in R/Param_npCATT.R \name{Param_npCATT} \alias{Param_npCATT} -\title{Average Treatment Effect} +\title{Nonparametric inference for user-specified parametric working models for the conditional treatment effect. +The true conditional average treatment effect is projected onto a parametric working model using only individuals with \code{A=1} (among the treated). +This can be used to assess heterogeneity of the average treatment effect and avoids positivity issues by focusing on best approximating the conditional average treatment effect ampng the treated. +We note that \code{formula_CATT = ~ 1} gives an estimator of the nonparametric average treatment effect among the treated (ATT).} \format{ \code{\link{R6Class}} object. } @@ -10,7 +13,10 @@ \code{Param_base} object } \description{ -Parameter definition for the Average Treatment Effect (ATE). +Nonparametric inference for user-specified parametric working models for the conditional treatment effect. +The true conditional average treatment effect is projected onto a parametric working model using only individuals with \code{A=1} (among the treated). +This can be used to assess heterogeneity of the average treatment effect and avoids positivity issues by focusing on best approximating the conditional average treatment effect ampng the treated. +We note that \code{formula_CATT = ~ 1} gives an estimator of the nonparametric average treatment effect among the treated (ATT). } \section{Constructor}{ diff --git a/man/Param_npOR.Rd b/man/Param_npOR.Rd index b3677b99..a9f9ef15 100644 --- a/man/Param_npOR.Rd +++ b/man/Param_npOR.Rd @@ -2,7 +2,12 @@ % Please edit documentation in R/Param_npOR.R \name{Param_npOR} \alias{Param_npOR} -\title{Average Treatment Effect} +\title{Nonparametric inference for user-specified parametric working models for the conditional odds ratio between two binary variables +The true conditional odds ratio is projected onto a parametric working model using logistic-regression. +This can be used to assess heterogeneity of the odds ratio. +We note that \code{formula_logOR = ~ 1} gives an estimator of the nonparametric marginal odds ratio among the treated. +The parametric model is at the log-scale and therefore the coefficients returned code the linear predictor for the \code{log}-conditional odds ratio. +Parameter definition for the Average Treatment Effect (ATE).} \format{ \code{\link{R6Class}} object. } @@ -10,6 +15,11 @@ \code{Param_base} object } \description{ +Nonparametric inference for user-specified parametric working models for the conditional odds ratio between two binary variables +The true conditional odds ratio is projected onto a parametric working model using logistic-regression. +This can be used to assess heterogeneity of the odds ratio. +We note that \code{formula_logOR = ~ 1} gives an estimator of the nonparametric marginal odds ratio among the treated. +The parametric model is at the log-scale and therefore the coefficients returned code the linear predictor for the \code{log}-conditional odds ratio. Parameter definition for the Average Treatment Effect (ATE). } \section{Constructor}{ @@ -19,7 +29,7 @@ Parameter definition for the Average Treatment Effect (ATE). \describe{ \item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood } -\item{\code{formula_OR}}{... +\item{\code{formula_logOR}}{... } \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. } diff --git a/man/Param_spCATE.Rd b/man/Param_spCATE.Rd index 53574037..c01202e1 100644 --- a/man/Param_spCATE.Rd +++ b/man/Param_spCATE.Rd @@ -2,7 +2,10 @@ % Please edit documentation in R/Param_spCATE.R \name{Param_spCATE} \alias{Param_spCATE} -\title{Average Treatment Effect} +\title{Semiparametric estimation of the conditonal average treatment effect for arbitrary partially-linear least-squares regression models. +This is a semiparametric version of \code{Param_npCATT} and \code{Param_npCATE} where the parametric model for the CATE is assumed correct. +Assuming the semiparametric model to be true allows for some efficiency gain (when true) but may lead to less robust estimates due to misspecification. +Note a linear-link is used for the CATE.} \format{ \code{\link{R6Class}} object. } @@ -10,7 +13,10 @@ \code{Param_base} object } \description{ -Parameter definition for the Average Treatment Effect (ATE). +Semiparametric estimation of the conditonal average treatment effect for arbitrary partially-linear least-squares regression models. +This is a semiparametric version of \code{Param_npCATT} and \code{Param_npCATE} where the parametric model for the CATE is assumed correct. +Assuming the semiparametric model to be true allows for some efficiency gain (when true) but may lead to less robust estimates due to misspecification. +Note a linear-link is used for the CATE. } \section{Constructor}{ diff --git a/man/Param_spOR.Rd b/man/Param_spOR.Rd index 40311507..e86a5b9d 100644 --- a/man/Param_spOR.Rd +++ b/man/Param_spOR.Rd @@ -2,7 +2,10 @@ % Please edit documentation in R/Param_spOR.R \name{Param_spOR} \alias{Param_spOR} -\title{Average Treatment Effect} +\title{Semiparametric estimation of the conditonal odds ratio for arbitrary partially-linear logistic regression models. +This is a semiparametric version of \code{Param_npOR} where the parametric model for the OR is assumed correct. +Assuming the semiparametric model to be true allows for some efficiency gain (when true) but may lead to less robust estimates due to misspecification. +The parametric model is at the log-scale and therefore the coefficients returned code the linear predictor for the \code{log}-conditional odds ratio.} \format{ \code{\link{R6Class}} object. } @@ -10,7 +13,10 @@ \code{Param_base} object } \description{ -Parameter definition for the Average Treatment Effect (ATE). +Semiparametric estimation of the conditonal odds ratio for arbitrary partially-linear logistic regression models. +This is a semiparametric version of \code{Param_npOR} where the parametric model for the OR is assumed correct. +Assuming the semiparametric model to be true allows for some efficiency gain (when true) but may lead to less robust estimates due to misspecification. +The parametric model is at the log-scale and therefore the coefficients returned code the linear predictor for the \code{log}-conditional odds ratio. } \section{Constructor}{ @@ -19,7 +25,7 @@ Parameter definition for the Average Treatment Effect (ATE). \describe{ \item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood } -\item{\code{formula_OR}}{... +\item{\code{formula_logOR}}{... } \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. } diff --git a/man/Param_spRR.Rd b/man/Param_spRR.Rd index b666d1e5..eebf35c8 100644 --- a/man/Param_spRR.Rd +++ b/man/Param_spRR.Rd @@ -2,7 +2,10 @@ % Please edit documentation in R/Param_spRR.R \name{Param_spRR} \alias{Param_spRR} -\title{Average Treatment Effect} +\title{Semiparametric estimation of the conditonal relative risk/treatment-effect for arbitrary partially-linear log-linear/link regression models. +Arbitrary user-specified parametric models for the conditional relative-risk are supported. +Assuming the semiparametric model to be true allows for some efficiency gain (when true) but may lead to less robust estimates due to misspecification. +The parametric model is at the log-scale and therefore the coefficients returned code the linear predictor for the \code{log}-relative-risk.} \format{ \code{\link{R6Class}} object. } @@ -10,7 +13,10 @@ \code{Param_base} object } \description{ -Parameter definition for the Average Treatment Effect (ATE). +Semiparametric estimation of the conditonal relative risk/treatment-effect for arbitrary partially-linear log-linear/link regression models. +Arbitrary user-specified parametric models for the conditional relative-risk are supported. +Assuming the semiparametric model to be true allows for some efficiency gain (when true) but may lead to less robust estimates due to misspecification. +The parametric model is at the log-scale and therefore the coefficients returned code the linear predictor for the \code{log}-relative-risk. } \section{Constructor}{ @@ -19,7 +25,7 @@ Parameter definition for the Average Treatment Effect (ATE). \describe{ \item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood } -\item{\code{formula_RR}}{... +\item{\code{formula_logRR}}{... } \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. } diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index f340b36e..c9797501 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -11,8 +11,14 @@ knitr::opts_chunk$set(echo = TRUE) ```{r} -library(sl3) -n <- 200 +passes <- c() +passes1 <- c() +passes2 <- c() + +for(i in 1:100){ + print(i) + +n <- 500 W <- runif(n, -1, 1) A <- rbinom(n, size = 1, prob = plogis(W)) Y <- rnorm(n, mean = A+W, sd = 0.3) @@ -25,37 +31,62 @@ learner_list <- list(A = lrnr_A, Y = lrnr_Y0W, var_Y = Lrnr_mean$new()) # spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1, "CATE") # out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1, "CATE") -out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) -out +suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + out <- out$summary +passes <- c(passes , out$lower <= 1 & out$upper >= 1) + spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1, "CATT") -out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) -out +suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + out <- out$summary +passes1 <- c(passes1 , out$lower <= 1 & out$upper >= 1) + spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1, "CATE") -out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) -out +suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + out <- out$summary +passes2 <- c(passes2 , out$lower <= 1 & out$upper >= 1) + +print(mean(passes)) +print(mean(passes1)) +print(mean(passes2)) +} ``` -```{r} +```{r, include = F} + +passes <- c() +passes1 <- c() +for(i in 1:100){ + print(i) library(sl3) -n <- 200 +n <- 500 W <- runif(n, -1, 1) -A <- rbinom(n, size = 1, prob = plogis(W)) +A <- rbinom(n, size = 1, prob = plogis(0)) Y <- rbinom(n, size = 1, prob = plogis(A + W)) + quantile(plogis(1 + W) * (1-plogis(1 + W)) / ( plogis( W) * (1-plogis( W)))) data <- data.table(W,A,Y) lrnr_Y0W <- Lrnr_glmnet$new() lrnr_A <- Lrnr_glm$new() node_list <- list (W = "W", A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1, "OR") -out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) -out + suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list)) + out <- out$summary +passes <- c(passes , out$lower <= 1 & out$upper >= 1) + +spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1, "OR") +suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + out <- out$summary +passes1 <- c(passes1 , out$lower <= 1 & out$upper >= 1) +print(mean(passes)) +print(mean(passes1)) +} ``` From 6bd79b4518c55b49cbf1263ca9ea32c512380398 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 11:48:13 -0700 Subject: [PATCH 20/65] testing --- R/Lrnr_glm_semiparametric.R | 8 +++---- R/Param_npCATE.R | 8 +++---- R/Param_npCATT.R | 6 ++--- R/Param_npOR.R | 12 +++++----- R/Param_spCATE.R | 12 +++++----- R/Param_spOR.R | 22 +++++++++---------- R/Param_spRR.R | 16 +++++++------- R/tmle3_spec_spCausalGLM.R | 4 +++- man/Lrnr_glm_semiparametric.Rd | 2 +- tests/test-spnpCATECATT.R | 40 ++++++++++++++++++++++++++++++++++ tests/test-spnpOR.R | 29 ++++++++++++++++++++++++ tests/testthat/test-spRR.R | 26 ++++++++++++++++++++++ vignettes/testing.Rmd | 37 ++++++++++++++++++------------- 13 files changed, 163 insertions(+), 59 deletions(-) create mode 100644 tests/test-spnpCATECATT.R create mode 100644 tests/test-spnpOR.R create mode 100644 tests/testthat/test-spRR.R diff --git a/R/Lrnr_glm_semiparametric.R b/R/Lrnr_glm_semiparametric.R index 2eea612f..6e757fef 100644 --- a/R/Lrnr_glm_semiparametric.R +++ b/R/Lrnr_glm_semiparametric.R @@ -2,7 +2,7 @@ #' #' This learner provides fitting procedures for semiparametric generalized linear models using a user-given baseline learner and #' \code{\link[stats]{glm.fit}}. It supports models of the form `linkfun(E[Y|A,W]) = linkfun(E[Y|A=0,W]) + A * f(W)` where `A` is a binary or continuous interaction variable, -#' and `f(W)` is a user-specified parametric function (e.g. `f(W) = model.matrix(formula_sp, W)`). +#' and `f(W)` is a user-specified parametric function (e.g. `f(W) = model.matrix(formula_sp, W)`). The baseline function `E[Y|A=0,W]` is fit using a user-specified \code{sl3}-Learner (possibly pooled over values of `A` and then projected onto the semiparametric model). #' #' @docType class #' @@ -152,9 +152,9 @@ Lrnr_glm_semiparametric <- R6Class( task_baseline0 <- task$next_in_chain(covariates = colnames(X0), column_names = column_names ) Q0 <- lrnr_baseline$predict(task_baseline0) } - - Q1 <- family$linkinv(family$linkfun(Q0) + V%*%beta) - Q <- family$linkinv(family$linkfun(Q0) + A*V%*%beta) + Q0 <- as.vector(Q0) + Q1 <- as.vector(family$linkinv(family$linkfun(Q0) + V%*%beta)) + Q <- as.vector(family$linkinv(family$linkfun(Q0) + A*V%*%beta)) if(self$params$return_matrix_predictions && binary) { predictions <- cbind(Q0,Q1,Q) colnames(predictions) <- c("A=0", "A=1", "A") diff --git a/R/Param_npCATE.R b/R/Param_npCATE.R index 64dcfcb7..99c37d13 100644 --- a/R/Param_npCATE.R +++ b/R/Param_npCATE.R @@ -96,11 +96,11 @@ Param_npCATE <- R6Class( g1 <- ifelse(A==1, g, 1-g) g0 <- 1-g1 - Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) - Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) - Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + Q <- as.vector(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + Q0 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number)) + Q1 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number)) beta <- coef(glm.fit(V_train, Q1-Q0, family = gaussian(), weights = self$weights)) - CATE <- V %*% beta + CATE <- as.vector(V %*% beta) # var_Y <- self$cf_likelihood_treatment$get_likelihoods(tmle_task, "var_Y", fold_number) # var_Y0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "var_Y", fold_number) # var_Y1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "var_Y", fold_number) diff --git a/R/Param_npCATT.R b/R/Param_npCATT.R index a09fde68..a7005e9a 100644 --- a/R/Param_npCATT.R +++ b/R/Param_npCATT.R @@ -93,9 +93,9 @@ Param_npCATT <- R6Class( g1 <- ifelse(A==1, g, 1-g) g0 <- 1-g1 - Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) - Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) - Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + Q <- as.vector(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + Q0 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number)) + Q1 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number)) beta <- get_beta(W_train, A_train, self$formula_CATT, Q1, Q0, family = gaussian(), weights = self$weights) # var_Y <- self$cf_likelihood_treatment$get_likelihoods(tmle_task, "var_Y", fold_number) diff --git a/R/Param_npOR.R b/R/Param_npOR.R index 793f2ff7..643b237d 100644 --- a/R/Param_npOR.R +++ b/R/Param_npOR.R @@ -90,20 +90,20 @@ Param_npOR <- R6Class( g1 <- ifelse(A==1, g, 1-g) g0 <- 1-g1 - Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) - Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) - Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + Q <- as.vector(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + Q0 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number)) + Q1 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number)) Qorig <- Q Q0 <- bound(Q0, 0.005) Q1 <- bound(Q1, 0.005) beta <- get_beta(W_train, A_train, self$formula_logOR, Q1, Q0, family = binomial(), weights = self$weights) - Q1beta <- plogis(qlogis(Q0) + V%*%beta) + Q1beta <- as.vector(plogis(qlogis(Q0) + V%*%beta)) sigma_rel <- Q1beta*(1-Q1beta) / (Q0*(1-Q0)) - omega <- (g0 + g1*sigma_rel) / (g0) + omega <- as.vector((g0 + g1*sigma_rel) / (g0)) h_star <- -1*as.vector((g1*sigma_rel) / (g1*sigma_rel + (1-g1))) H <- as.matrix(omega*V*(A + h_star)) @@ -115,7 +115,7 @@ Param_npOR <- R6Class( tryCatch({ scale <- apply(V,2, function(v){apply(self$weights*(A * Q1beta*(1-Q1beta) * v*V),2,mean)}) scaleinv <- solve(scale) - EIF_Y <- self$weights * (H%*% scaleinv) * (Y-Q) + EIF_Y <- self$weights * (H%*% scaleinv) * as.vector(Y-Q) EIFWA <- apply(V, 2, function(v) { (self$weights*(A*v*(Q1 - Q1beta)) - mean( self$weights*(A*v*(Q1 - Q1beta)))) }) diff --git a/R/Param_spCATE.R b/R/Param_spCATE.R index 4a342e9e..767a309a 100644 --- a/R/Param_spCATE.R +++ b/R/Param_spCATE.R @@ -92,17 +92,17 @@ Param_spCATE <- R6Class( #Q0 <- Q_packed[[1]] #Q1 <- Q_packed[[2]] #Q <- Q_packed[[3]] - Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) - Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) - Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + Q <- as.vector(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + Q0 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number)) + Q1 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number)) #print(data.table(Q0,Q1,Q)) #Extract current semiparametric coef #print(data.table(Q1,Q0)) #beta <- get_beta(W, A, self$formula_CATE, Q1, Q0, family = gaussian(), weights = weights) # Get conditional variances - var_Y <- self$cf_likelihood_treatment$get_likelihoods(tmle_task, "var_Y", fold_number) - var_Y0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "var_Y", fold_number) - var_Y1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "var_Y", fold_number) + var_Y <- as.vector(self$cf_likelihood_treatment$get_likelihoods(tmle_task, "var_Y", fold_number)) + var_Y0 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task0, "var_Y", fold_number)) + var_Y1 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task1, "var_Y", fold_number)) gradM <- V num <- gradM * ( g1/var_Y1) diff --git a/R/Param_spOR.R b/R/Param_spOR.R index 75c18856..a3f7b56d 100644 --- a/R/Param_spOR.R +++ b/R/Param_spOR.R @@ -79,16 +79,16 @@ Param_spOR <- R6Class( V <- model.matrix(self$formula_logOR, as.data.frame(W)) A <- tmle_task$get_tmle_node("A", format = TRUE)[[1]] Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] - g <- self$observed_likelihood$get_likelihoods(tmle_task, "A", fold_number) + g <- self$observed_likelihood$get_likelihood(tmle_task, "A", fold_number) g1 <- ifelse(A==1, g, 1-g) g0 <- 1-g1 #Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) #Q0 <- Q_packed[[1]] #Q1 <- Q_packed[[2]] #Q <- Q_packed[[3]] - Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) - Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) - Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + Q <- as.vector(self$observed_likelihood$get_likelihood(tmle_task, "Y", fold_number)) + Q0 <- as.vector(self$cf_likelihood_treatment$get_likelihood(cf_task0, "Y", fold_number)) + Q1 <- as.vector(self$cf_likelihood_treatment$get_likelihood(cf_task1, "Y", fold_number)) Qorig <- Q Q0 <- bound(Q0, 0.005) Q1 <- bound(Q1, 0.005) @@ -104,9 +104,8 @@ Param_spOR <- R6Class( tryCatch({ scale <- apply(V,2, function(v){apply(self$weights*as.vector( Q1*(1-Q1) * Q0*(1-Q0) * g1 * (1-g1) / (g1 * Q1*(1-Q1) + (1-g1) *Q0*(1-Q0) )) * v*V,2,mean)}) scaleinv <- solve(scale) - EIF_Y <- self$weights * (H%*% scaleinv) * (Y-Q) - }, error = function(...){ + EIF_Y <- self$weights * (H%*% scaleinv) * as.vector(Y-Q) }) } @@ -125,9 +124,9 @@ Param_spOR <- R6Class( weights <- tmle_task$weights # clever_covariates happen here (for this param) only, but this is repeated computation EIF <- self$clever_covariates(tmle_task, fold_number, is_training_task = TRUE)$EIF$Y - Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) - Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) - Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + Q <- self$observed_likelihood$get_likelihood(tmle_task, "Y", fold_number) + Q0 <- self$cf_likelihood_treatment$get_likelihood(cf_task0, "Y", fold_number) + Q1 <- self$cf_likelihood_treatment$get_likelihood(cf_task1, "Y", fold_number) Qtest <- ifelse(A==1, Q1, Q0) if(!all(Qtest-Q==0)) { stop("Q and Q1,Q0 dont match") @@ -140,9 +139,10 @@ Param_spOR <- R6Class( Q1 <- bound(Q1, 0.0005) beta <- get_beta(W, A, self$formula_logOR, Q1, Q0, family = binomial(), weights = weights) V <- model.matrix(self$formula_logOR, as.data.frame(W)) - OR <- exp(V%*%beta) + OR <- as.vector(exp(V%*%beta)) + + IC <- as.matrix(EIF) - IC <- EIF result <- list(psi = beta, IC = IC, OR = OR) return(result) diff --git a/R/Param_spRR.R b/R/Param_spRR.R index bdfb9711..f9d3461d 100644 --- a/R/Param_spRR.R +++ b/R/Param_spRR.R @@ -81,22 +81,22 @@ Param_spRR <- R6Class( A <- tmle_task$get_tmle_node("A", format = TRUE)[[1]] Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] - g <- self$observed_likelihood$get_likelihoods(tmle_task, "A", fold_number) + g <- self$observed_likelihood$get_likelihood(tmle_task, "A", fold_number) g1 <- ifelse(A==1, g, 1-g) g0 <- 1-g1 #Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) #Q0 <- Q_packed[[1]] #Q1 <- Q_packed[[2]] #Q <- Q_packed[[3]] - Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) - Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) - Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + Q <- as.vector(self$observed_likelihood$get_likelihood(tmle_task, "Y", fold_number)) + Q0 <- as.vector(self$cf_likelihood_treatment$get_likelihood(cf_task0, "Y", fold_number)) + Q1 <- as.vector(self$cf_likelihood_treatment$get_likelihood(cf_task1, "Y", fold_number)) Qorig <- Q Q0 <- pmax(Q0, 0.005) Q1 <- pmax(Q1, 0.005) - RR <- Q1/Q0 + RR <- as.vector(Q1/Q0) gradM <- V mstar <- RR + (1-A)*1 num <- gradM * ( RR * g1) @@ -107,10 +107,10 @@ Param_spRR <- R6Class( # Store EIF component EIF_Y <- NULL if(is_training_task) { - scale <- apply(V,2, function(v) { - apply(self$weights*V*v*g1*g0*RR/(g1*RR + g0)^2 *(Y-Q) + H*(A*v*Q),2,mean) + apply(self$weights*V*v*g1*g0*RR/(g1*RR + g0)^2 *(Y-Q) + self$weights*H*(A*v*Q),2,mean) }) + scaleinv <- solve(scale) EIF_Y <- as.matrix(self$weights * (H%*% scaleinv) * (Y-Q)) @@ -148,7 +148,7 @@ Param_spRR <- R6Class( Q1 <- pmax(Q1, 0.0005) beta <- get_beta(W, A, self$formula_logRR, Q1, Q0, family = poisson(), weights = weights) V <- model.matrix(self$formula_logRR, as.data.frame(W)) - RR <- exp(V%*%beta) + RR <- as.vector(exp(V%*%beta)) IC <- as.matrix(EIF) diff --git a/R/tmle3_spec_spCausalGLM.R b/R/tmle3_spec_spCausalGLM.R index b8b9169b..26f483be 100644 --- a/R/tmle3_spec_spCausalGLM.R +++ b/R/tmle3_spec_spCausalGLM.R @@ -24,7 +24,9 @@ tmle3_Spec_spCausalGLM <- R6Class( make_tmle_task = function(data, node_list, ...) { variable_types <- self$options$variable_types include_variance_node <- self$options$estimand == "CATE" - + if(self$options$estimand == "RR") { + variable_types <- list(Y = variable_type("continuous")) + } tmle_task <- point_tx_task(data, node_list, variable_types, scale_outcome = FALSE, include_variance_node = include_variance_node) return(tmle_task) diff --git a/man/Lrnr_glm_semiparametric.Rd b/man/Lrnr_glm_semiparametric.Rd index 2ad52413..808fbceb 100644 --- a/man/Lrnr_glm_semiparametric.Rd +++ b/man/Lrnr_glm_semiparametric.Rd @@ -14,7 +14,7 @@ Learner object with methods for training and prediction. See \description{ This learner provides fitting procedures for semiparametric generalized linear models using a user-given baseline learner and \code{\link[stats]{glm.fit}}. It supports models of the form \verb{linkfun(E[Y|A,W]) = linkfun(E[Y|A=0,W]) + A * f(W)} where \code{A} is a binary or continuous interaction variable, -and \code{f(W)} is a user-specified parametric function (e.g. \code{f(W) = model.matrix(formula_sp, W)}). +and \code{f(W)} is a user-specified parametric function (e.g. \code{f(W) = model.matrix(formula_sp, W)}). The baseline function \verb{E[Y|A=0,W]} is fit using a user-specified \code{sl3}-Learner (possibly pooled over values of \code{A} and then projected onto the semiparametric model). } \section{Parameters}{ diff --git a/tests/test-spnpCATECATT.R b/tests/test-spnpCATECATT.R new file mode 100644 index 00000000..59c8b707 --- /dev/null +++ b/tests/test-spnpCATECATT.R @@ -0,0 +1,40 @@ +passes <- c() +passes1 <- c() +passes2 <- c() + +for(i in 1:1){ + print(i) + + n <- 500 + W <- runif(n, -1, 1) + A <- rbinom(n, size = 1, prob = plogis(W)) + Y <- rnorm(n, mean = A+W, sd = 0.3) + data <- data.table(W,A,Y) + lrnr_Y0W <- Lrnr_glmnet$new() + lrnr_A <- Lrnr_glm$new() + + node_list <- list (W = "W", A = "A", Y= "Y") + learner_list <- list(A = lrnr_A, Y = lrnr_Y0W, var_Y = Lrnr_mean$new()) + # spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1, "CATE") + # out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) + spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1, "CATE") + suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + out <- out$summary + passes <- c(passes , out$lower <= 1 & out$upper >= 1) + + + spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1, "CATT") + suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + out <- out$summary + passes1 <- c(passes1 , out$lower <= 1 & out$upper >= 1) + + + spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1, "CATE") + suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + out <- out$summary + passes2 <- c(passes2 , out$lower <= 1 & out$upper >= 1) + + print(mean(passes)) + print(mean(passes1)) + print(mean(passes2)) +} diff --git a/tests/test-spnpOR.R b/tests/test-spnpOR.R new file mode 100644 index 00000000..042ee299 --- /dev/null +++ b/tests/test-spnpOR.R @@ -0,0 +1,29 @@ +passes <- c() +passes1 <- c() +for(i in 1:1){ + print(i) + library(sl3) + n <- 500 + W <- runif(n, -1, 1) + A <- rbinom(n, size = 1, prob = plogis(0)) + Y <- rbinom(n, size = 1, prob = plogis(A + W + A*W)) + quantile(plogis(1 + W) * (1-plogis(1 + W)) / ( plogis( W) * (1-plogis( W)))) + data <- data.table(W,A,Y) + lrnr_Y0W <- Lrnr_glmnet$new() + lrnr_A <- Lrnr_glm$new() + node_list <- list (W = "W", A = "A", Y= "Y") + learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) + spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W, "OR") + suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list)) + out <- out$summary + passes <- cbind(passes , out$lower <= 1 & out$upper >= 1) + + + spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "OR") + suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + out <- out$summary + passes1 <- cbind(passes1 , out$lower <= 1 & out$upper >= 1) + + print(rowMeans(passes)) + print(rowMeans(passes1)) +} diff --git a/tests/testthat/test-spRR.R b/tests/testthat/test-spRR.R new file mode 100644 index 00000000..65246428 --- /dev/null +++ b/tests/testthat/test-spRR.R @@ -0,0 +1,26 @@ + + + + + +passes <- c() +for(i in 1:1){ + print(i) + n <- 500 + W <- runif(n, -1, 1) + A <- rbinom(n, size = 1, prob = plogis(W)) + Y <- rpois(n, exp(A + A*W + W)) + data <- data.table(W,A,Y) + data + lrnr_Y0W <- Lrnr_glmnet$new(family = "poisson") + lrnr_A <- Lrnr_glm$new() + + node_list <- list (W = "W", A = "A", Y= "Y") + learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) + spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W, "RR") + out <- suppressWarnings(tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + out <- out$summary + passes <- cbind(passes , out$lower <= 1 & out$upper >= 1) + print(rowMeans(passes)) + +} diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index c9797501..ae1afed6 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -66,45 +66,52 @@ library(sl3) n <- 500 W <- runif(n, -1, 1) A <- rbinom(n, size = 1, prob = plogis(0)) -Y <- rbinom(n, size = 1, prob = plogis(A + W)) +Y <- rbinom(n, size = 1, prob = plogis(A + W + A*W)) quantile(plogis(1 + W) * (1-plogis(1 + W)) / ( plogis( W) * (1-plogis( W)))) data <- data.table(W,A,Y) lrnr_Y0W <- Lrnr_glmnet$new() lrnr_A <- Lrnr_glm$new() node_list <- list (W = "W", A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) -spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1, "OR") +spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W, "OR") suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list)) out <- out$summary -passes <- c(passes , out$lower <= 1 & out$upper >= 1) +passes <- cbind(passes , out$lower <= 1 & out$upper >= 1) -spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1, "OR") +spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "OR") suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) out <- out$summary -passes1 <- c(passes1 , out$lower <= 1 & out$upper >= 1) +passes1 <- cbind(passes1 , out$lower <= 1 & out$upper >= 1) -print(mean(passes)) -print(mean(passes1)) +print(rowMeans(passes)) +print(rowMeans(passes1)) } ``` ```{r} library(sl3) -n <- 200 + +passes <- c() +for(i in 1:100){ + print(i) +n <- 500 W <- runif(n, -1, 1) A <- rbinom(n, size = 1, prob = plogis(W)) -Y <- rpois(n, exp(A + W)) +Y <- rpois(n, exp(A + A*W + W)) data <- data.table(W,A,Y) data -lrnr_Y0W <- Lrnr_glmnet$new(family = poisson()) +lrnr_Y0W <- Lrnr_glmnet$new(family = "poisson") lrnr_A <- Lrnr_glm$new() -lrnr_sp <- Lrnr_glm_semiparametric$new(formula_sp=~1 , lrnr_Y0W, interaction_variable = "A", family = poisson(), return_matrix_predictions = FALSE) + node_list <- list (W = "W", A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) -spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1, "RR") -out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) -out$updater$bounds("Y") -out +spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W, "RR") +out <- suppressWarnings(tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + out <- out$summary +passes <- cbind(passes , out$lower <= 1 & out$upper >= 1) +print(rowMeans(passes)) + +} ``` From 0f15fc086140bf242d957b0d2a091a77bd66e7db Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 11:53:48 -0700 Subject: [PATCH 21/65] hi --- tests/testthat/test-spRR.R | 1 + tests/{ => testthat}/test-spnpCATECATT.R | 3 +++ tests/{ => testthat}/test-spnpOR.R | 2 ++ vignettes/testing.Rmd | 32 ++++++++++++------------ 4 files changed, 22 insertions(+), 16 deletions(-) rename tests/{ => testthat}/test-spnpCATECATT.R (96%) rename tests/{ => testthat}/test-spnpOR.R (97%) diff --git a/tests/testthat/test-spRR.R b/tests/testthat/test-spRR.R index 65246428..76286a5b 100644 --- a/tests/testthat/test-spRR.R +++ b/tests/testthat/test-spRR.R @@ -1,4 +1,5 @@ +context("spRR test") diff --git a/tests/test-spnpCATECATT.R b/tests/testthat/test-spnpCATECATT.R similarity index 96% rename from tests/test-spnpCATECATT.R rename to tests/testthat/test-spnpCATECATT.R index 59c8b707..98ffa58f 100644 --- a/tests/test-spnpCATECATT.R +++ b/tests/testthat/test-spnpCATECATT.R @@ -1,3 +1,6 @@ +context("spCATE, npCATE, npCATT test") + + passes <- c() passes1 <- c() passes2 <- c() diff --git a/tests/test-spnpOR.R b/tests/testthat/test-spnpOR.R similarity index 97% rename from tests/test-spnpOR.R rename to tests/testthat/test-spnpOR.R index 042ee299..ad03487b 100644 --- a/tests/test-spnpOR.R +++ b/tests/testthat/test-spnpOR.R @@ -1,3 +1,5 @@ +context("spnpOR test") + passes <- c() passes1 <- c() for(i in 1:1){ diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index ae1afed6..023c07c9 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -15,41 +15,41 @@ passes <- c() passes1 <- c() passes2 <- c() -for(i in 1:100){ +for(i in 1:200){ print(i) n <- 500 W <- runif(n, -1, 1) A <- rbinom(n, size = 1, prob = plogis(W)) -Y <- rnorm(n, mean = A+W, sd = 0.3) +Y <- rnorm(n, mean = A*W + A+W, sd = 0.3) data <- data.table(W,A,Y) -lrnr_Y0W <- Lrnr_glmnet$new() +lrnr_Y0W <- Lrnr_glm$new() lrnr_A <- Lrnr_glm$new() node_list <- list (W = "W", A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W, var_Y = Lrnr_mean$new()) # spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1, "CATE") # out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) -spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1, "CATE") +spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "CATE") suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) out <- out$summary -passes <- c(passes , out$lower <= 1 & out$upper >= 1) +passes <- cbind(passes , out$lower <= 1 & out$upper >= 1) -spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1, "CATT") +spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "CATT") suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) out <- out$summary -passes1 <- c(passes1 , out$lower <= 1 & out$upper >= 1) +passes1 <- cbind(passes1 , out$lower <= 1 & out$upper >= 1) -spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1, "CATE") +spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W, "CATE") suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) out <- out$summary -passes2 <- c(passes2 , out$lower <= 1 & out$upper >= 1) +passes2 <- cbind(passes2 , out$lower <= 1 & out$upper >= 1) -print(mean(passes)) -print(mean(passes1)) -print(mean(passes2)) +print(rowMeans(passes)) +print(rowMeans(passes1)) +print(rowMeans(passes2)) } ``` @@ -60,7 +60,7 @@ print(mean(passes2)) passes <- c() passes1 <- c() -for(i in 1:100){ +for(i in 1:200){ print(i) library(sl3) n <- 500 @@ -69,7 +69,7 @@ A <- rbinom(n, size = 1, prob = plogis(0)) Y <- rbinom(n, size = 1, prob = plogis(A + W + A*W)) quantile(plogis(1 + W) * (1-plogis(1 + W)) / ( plogis( W) * (1-plogis( W)))) data <- data.table(W,A,Y) -lrnr_Y0W <- Lrnr_glmnet$new() +lrnr_Y0W <- Lrnr_glm$new() lrnr_A <- Lrnr_glm$new() node_list <- list (W = "W", A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) @@ -94,7 +94,7 @@ print(rowMeans(passes1)) library(sl3) passes <- c() -for(i in 1:100){ +for(i in 1:200){ print(i) n <- 500 W <- runif(n, -1, 1) @@ -102,7 +102,7 @@ A <- rbinom(n, size = 1, prob = plogis(W)) Y <- rpois(n, exp(A + A*W + W)) data <- data.table(W,A,Y) data -lrnr_Y0W <- Lrnr_glmnet$new(family = "poisson") +lrnr_Y0W <- Lrnr_glm$new(family = "poisson") lrnr_A <- Lrnr_glm$new() node_list <- list (W = "W", A = "A", Y= "Y") From b0174188b63893b10c8e5b0ec72b10b1be79c0d0 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 12:09:50 -0700 Subject: [PATCH 22/65] more tests --- vignettes/testing.Rmd | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index 023c07c9..d18b1c53 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -23,8 +23,8 @@ W <- runif(n, -1, 1) A <- rbinom(n, size = 1, prob = plogis(W)) Y <- rnorm(n, mean = A*W + A+W, sd = 0.3) data <- data.table(W,A,Y) -lrnr_Y0W <- Lrnr_glm$new() -lrnr_A <- Lrnr_glm$new() +lrnr_Y0W <- Lrnr_gam$new() +lrnr_A <- Lrnr_gam$new() node_list <- list (W = "W", A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W, var_Y = Lrnr_mean$new()) @@ -69,8 +69,8 @@ A <- rbinom(n, size = 1, prob = plogis(0)) Y <- rbinom(n, size = 1, prob = plogis(A + W + A*W)) quantile(plogis(1 + W) * (1-plogis(1 + W)) / ( plogis( W) * (1-plogis( W)))) data <- data.table(W,A,Y) -lrnr_Y0W <- Lrnr_glm$new() -lrnr_A <- Lrnr_glm$new() +lrnr_Y0W <- Lrnr_gam$new() +lrnr_A <- Lrnr_gam$new() node_list <- list (W = "W", A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W, "OR") @@ -102,8 +102,8 @@ A <- rbinom(n, size = 1, prob = plogis(W)) Y <- rpois(n, exp(A + A*W + W)) data <- data.table(W,A,Y) data -lrnr_Y0W <- Lrnr_glm$new(family = "poisson") -lrnr_A <- Lrnr_glm$new() +lrnr_Y0W <- Lrnr_gam$new(family = poisson()) +lrnr_A <- Lrnr_gam$new() node_list <- list (W = "W", A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) From eaff276a85fdd51af90fbfdbba487afa8d48056a Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 12:12:14 -0700 Subject: [PATCH 23/65] more tests --- tests/testthat/test-spRR.R | 4 ++-- tests/testthat/test-spnpCATECATT.R | 4 ++-- tests/testthat/test-spnpOR.R | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-spRR.R b/tests/testthat/test-spRR.R index 76286a5b..ec85432a 100644 --- a/tests/testthat/test-spRR.R +++ b/tests/testthat/test-spRR.R @@ -13,8 +13,8 @@ for(i in 1:1){ Y <- rpois(n, exp(A + A*W + W)) data <- data.table(W,A,Y) data - lrnr_Y0W <- Lrnr_glmnet$new(family = "poisson") - lrnr_A <- Lrnr_glm$new() + lrnr_Y0W <- Lrnr_gam$new(family = poisson()) + lrnr_A <- Lrnr_gam$new() node_list <- list (W = "W", A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) diff --git a/tests/testthat/test-spnpCATECATT.R b/tests/testthat/test-spnpCATECATT.R index 98ffa58f..df961e85 100644 --- a/tests/testthat/test-spnpCATECATT.R +++ b/tests/testthat/test-spnpCATECATT.R @@ -13,8 +13,8 @@ for(i in 1:1){ A <- rbinom(n, size = 1, prob = plogis(W)) Y <- rnorm(n, mean = A+W, sd = 0.3) data <- data.table(W,A,Y) - lrnr_Y0W <- Lrnr_glmnet$new() - lrnr_A <- Lrnr_glm$new() + lrnr_Y0W <- Lrnr_gam$new() + lrnr_A <- Lrnr_gam$new() node_list <- list (W = "W", A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W, var_Y = Lrnr_mean$new()) diff --git a/tests/testthat/test-spnpOR.R b/tests/testthat/test-spnpOR.R index ad03487b..c8d480e2 100644 --- a/tests/testthat/test-spnpOR.R +++ b/tests/testthat/test-spnpOR.R @@ -11,8 +11,8 @@ for(i in 1:1){ Y <- rbinom(n, size = 1, prob = plogis(A + W + A*W)) quantile(plogis(1 + W) * (1-plogis(1 + W)) / ( plogis( W) * (1-plogis( W)))) data <- data.table(W,A,Y) - lrnr_Y0W <- Lrnr_glmnet$new() - lrnr_A <- Lrnr_glm$new() + lrnr_Y0W <- Lrnr_gam$new() + lrnr_A <- Lrnr_gam$new() node_list <- list (W = "W", A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W, "OR") From d9f2ee32b18b9c25772c7950c85e1e429000231a Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 12:16:45 -0700 Subject: [PATCH 24/65] ran make style --- R/LF_known.R | 6 +-- R/Lrnr_glm_semiparametric.R | 67 ++++++++++++------------ R/Param_base.R | 6 +-- R/Param_npCATE.R | 44 ++++++++-------- R/Param_npCATT.R | 55 ++++++++++---------- R/Param_npOR.R | 54 ++++++++++---------- R/Param_spCATE.R | 75 ++++++++++++++------------- R/Param_spOR.R | 42 +++++++-------- R/Param_spRR.R | 45 ++++++++-------- R/helpers_point_treatment.R | 22 ++++---- R/helpers_semiparametric.R | 21 +++----- R/submodels.R | 82 ++++++++++++++++-------------- R/tmle3_Spec_npCausalGLM.R | 43 ++++++++-------- R/tmle3_Update.R | 58 ++++++++++----------- R/tmle3_spec_spCausalGLM.R | 39 +++++++------- tests/testthat/test-ATE.R | 2 +- tests/testthat/test-spRR.R | 17 +++---- tests/testthat/test-spnpCATECATT.R | 24 ++++----- tests/testthat/test-spnpOR.R | 22 ++++---- 19 files changed, 362 insertions(+), 362 deletions(-) diff --git a/R/LF_known.R b/R/LF_known.R index 7a241042..1c0c31ba 100644 --- a/R/LF_known.R +++ b/R/LF_known.R @@ -49,7 +49,7 @@ LF_known <- R6Class( }, get_mean = function(tmle_task, fold_number) { learner_task <- tmle_task$get_regression_task(self$name, scale = FALSE) - if(!is.null(self$base_likelihood)) { + if (!is.null(self$base_likelihood)) { preds <- self$mean_fun(learner_task, tmle_task, self$base_likelihood) } else { preds <- self$mean_fun(learner_task) @@ -59,7 +59,7 @@ LF_known <- R6Class( }, get_density = function(tmle_task, fold_number) { learner_task <- tmle_task$get_regression_task(self$name, scale = FALSE) - if(!is.null(self$base_likelihood)) { + if (!is.null(self$base_likelihood)) { preds <- self$density_fun(learner_task, tmle_task, self$base_likelihood) } else { preds <- self$density_fun(learner_task) @@ -89,7 +89,7 @@ LF_known <- R6Class( density_fun = function() { return(private$.density_fun) }, - base_likelihood = function(){ + base_likelihood = function() { return(private$.base_likelihood) } ), diff --git a/R/Lrnr_glm_semiparametric.R b/R/Lrnr_glm_semiparametric.R index 6e757fef..3a4fa81c 100644 --- a/R/Lrnr_glm_semiparametric.R +++ b/R/Lrnr_glm_semiparametric.R @@ -51,17 +51,16 @@ Lrnr_glm_semiparametric <- R6Class( .properties = c("continuous", "binomial", "semiparametric", "weights"), .train = function(task) { - args <- self$params append_interaction_matrix <- args$append_interaction_matrix outcome_type <- self$get_outcome_type(task) trt <- args$interaction_variable - if(is.null(trt)) { + if (is.null(trt)) { A <- rep(1, task$nrow) } else { - A <- unlist(task$get_data(,trt)) + A <- unlist(task$get_data(, trt)) } - if(!all(A %in% c(0,1)) && !is.null(trt)) { + if (!all(A %in% c(0, 1)) && !is.null(trt)) { binary <- FALSE } else { binary <- TRUE @@ -79,44 +78,44 @@ Lrnr_glm_semiparametric <- R6Class( covariates <- setdiff(task$nodes$covariates, trt) - if(!append_interaction_matrix && binary) { + if (!append_interaction_matrix && binary) { task_baseline <- task$next_in_chain(covariates = covariates) - lrnr_baseline <- lrnr_baseline$train(task_baseline[A==0]) + lrnr_baseline <- lrnr_baseline$train(task_baseline[A == 0]) Q0 <- lrnr_baseline$predict(task_baseline) - beta <- suppressWarnings(coef(glm.fit(A*V, Y, offset = family$linkfun(Q0), intercept = F, weights = task$weights, family = family))) - Q1 <- family$linkinv(family$linkfun(Q0) + V%*%beta) - Q <- ifelse(A==1, Q1, Q0) + beta <- suppressWarnings(coef(glm.fit(A * V, Y, offset = family$linkfun(Q0), intercept = F, weights = task$weights, family = family))) + Q1 <- family$linkinv(family$linkfun(Q0) + V %*% beta) + Q <- ifelse(A == 1, Q1, Q0) } else { - covariates <- setdiff(task$nodes$covariates, trt) - if(append_interaction_matrix) { - AV <- as.data.table(A*V) - X <- cbind(task$X[,covariates, with = F], AV) - X0 <- cbind(task$X[,covariates, with = F], 0*V) + if (append_interaction_matrix) { + AV <- as.data.table(A * V) + X <- cbind(task$X[, covariates, with = F], AV) + X0 <- cbind(task$X[, covariates, with = F], 0 * V) } else { - X <- cbind(task$X[,covariates, with = F], A) - X0 <- cbind(task$X[,covariates, with = F], A*0) + X <- cbind(task$X[, covariates, with = F], A) + X0 <- cbind(task$X[, covariates, with = F], A * 0) } column_names <- task$add_columns(X) - task_baseline <- task$next_in_chain(covariates = colnames(X), column_names = column_names ) + task_baseline <- task$next_in_chain(covariates = colnames(X), column_names = column_names) column_names <- task$add_columns(X0) - task_baseline0 <- task$next_in_chain(covariates = colnames(X0), column_names = column_names ) + task_baseline0 <- task$next_in_chain(covariates = colnames(X0), column_names = column_names) lrnr_baseline <- lrnr_baseline$train(task_baseline) Q <- lrnr_baseline$predict(task_baseline) Q0 <- lrnr_baseline$predict(task_baseline0) # Project onto model - beta <- suppressWarnings(coef(glm.fit(A*V, Q, offset = family$linkfun(Q0), intercept = F, weights = task$weights, family = family))) - + beta <- suppressWarnings(coef(glm.fit(A * V, Q, offset = family$linkfun(Q0), intercept = F, weights = task$weights, family = family))) } - fit_object = list(beta = beta, lrnr_baseline = lrnr_baseline, covariates = covariates, family = family, formula = formula, - append_interaction_matrix = append_interaction_matrix, binary = binary, task_baseline = task_baseline) + fit_object <- list( + beta = beta, lrnr_baseline = lrnr_baseline, covariates = covariates, family = family, formula = formula, + append_interaction_matrix = append_interaction_matrix, binary = binary, task_baseline = task_baseline + ) return(fit_object) }, .predict = function(task) { @@ -130,33 +129,33 @@ Lrnr_glm_semiparametric <- R6Class( formula <- fit_object$formula trt <- self$params$interaction_variable - if(is.null(trt)) { + if (is.null(trt)) { A <- rep(1, task$nrow) } else { - A <- unlist(task$get_data(,trt)) + A <- unlist(task$get_data(, trt)) } V <- model.matrix(formula, task$data) colnames(V) <- paste0("V", 1:ncol(V)) - if(!append_interaction_matrix && binary) { + if (!append_interaction_matrix && binary) { task_baseline <- task$next_in_chain(covariates = covariates) Q0 <- lrnr_baseline$predict(task_baseline) } else { - if(append_interaction_matrix) { - X0 <- cbind(task$X[,covariates, with = F], 0*V) + if (append_interaction_matrix) { + X0 <- cbind(task$X[, covariates, with = F], 0 * V) } else { - X0 <- cbind(task$X[,covariates, with = F], 0) + X0 <- cbind(task$X[, covariates, with = F], 0) } column_names <- task$add_columns(X0) - task_baseline0 <- task$next_in_chain(covariates = colnames(X0), column_names = column_names ) - Q0 <- lrnr_baseline$predict(task_baseline0) + task_baseline0 <- task$next_in_chain(covariates = colnames(X0), column_names = column_names) + Q0 <- lrnr_baseline$predict(task_baseline0) } Q0 <- as.vector(Q0) - Q1 <- as.vector(family$linkinv(family$linkfun(Q0) + V%*%beta)) - Q <- as.vector(family$linkinv(family$linkfun(Q0) + A*V%*%beta)) - if(self$params$return_matrix_predictions && binary) { - predictions <- cbind(Q0,Q1,Q) + Q1 <- as.vector(family$linkinv(family$linkfun(Q0) + V %*% beta)) + Q <- as.vector(family$linkinv(family$linkfun(Q0) + A * V %*% beta)) + if (self$params$return_matrix_predictions && binary) { + predictions <- cbind(Q0, Q1, Q) colnames(predictions) <- c("A=0", "A=1", "A") predictions <- sl3::pack_predictions(predictions) } else { diff --git a/R/Param_base.R b/R/Param_base.R index 2d84f5dc..1527b23f 100644 --- a/R/Param_base.R +++ b/R/Param_base.R @@ -52,13 +52,13 @@ Param_base <- R6Class( cat(sprintf("%s: %s\n", class(self)[1], self$name)) }, supports_submodel = function(submodel_name, node) { - if(!(node%in% names(private$.submodel))) { + if (!(node %in% names(private$.submodel))) { node <- "default" } return(submodel_name %in% c(private$.submodel[[node]])) }, get_submodel_spec = function(update_node) { - if(!(update_node%in% names(private$.submodel))) { + if (!(update_node %in% names(private$.submodel))) { update_node <- "default" } return(get_submodel_spec(private$.submodel[[update_node]])) @@ -87,7 +87,7 @@ Param_base <- R6Class( submodel = function() { return(private$.submodel) }, - weights = function(){ + weights = function() { return(self$observed_likelihood$training_task$weights) } ), diff --git a/R/Param_npCATE.R b/R/Param_npCATE.R index 99c37d13..5e86cb76 100644 --- a/R/Param_npCATE.R +++ b/R/Param_npCATE.R @@ -52,12 +52,12 @@ Param_npCATE <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_CATE =~ 1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { + initialize = function(observed_likelihood, formula_CATE = ~1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") V <- model.matrix(formula_CATE, as.data.frame(W)) - private$.targeted <- rep(T,ncol(V)) + private$.targeted <- rep(T, ncol(V)) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists @@ -71,8 +71,6 @@ Param_npCATE <- R6Class( private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) }, clever_covariates = function(tmle_task = NULL, fold_number = "full", is_training_task = TRUE) { - - training_task <- self$observed_likelihood$training_task if (is.null(tmle_task)) { tmle_task <- training_task @@ -85,42 +83,44 @@ Param_npCATE <- R6Class( W <- tmle_task$get_tmle_node("W") V <- model.matrix(self$formula_CATE, as.data.frame(W)) - A <- tmle_task$get_tmle_node("A", format = T )[[1]] - Y <- tmle_task$get_tmle_node("Y", format = T )[[1]] + A <- tmle_task$get_tmle_node("A", format = T)[[1]] + Y <- tmle_task$get_tmle_node("Y", format = T)[[1]] W_train <- training_task$get_tmle_node("W") V_train <- model.matrix(self$formula_CATE, as.data.frame(W_train)) A_train <- training_task$get_tmle_node("A", format = TRUE)[[1]] Y_train <- training_task$get_tmle_node("Y", format = TRUE)[[1]] g <- self$observed_likelihood$get_likelihoods(tmle_task, "A", fold_number) - g1 <- ifelse(A==1, g, 1-g) - g0 <- 1-g1 + g1 <- ifelse(A == 1, g, 1 - g) + g0 <- 1 - g1 Q <- as.vector(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) Q0 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number)) Q1 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number)) - beta <- coef(glm.fit(V_train, Q1-Q0, family = gaussian(), weights = self$weights)) + beta <- coef(glm.fit(V_train, Q1 - Q0, family = gaussian(), weights = self$weights)) CATE <- as.vector(V %*% beta) # var_Y <- self$cf_likelihood_treatment$get_likelihoods(tmle_task, "var_Y", fold_number) # var_Y0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "var_Y", fold_number) # var_Y1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "var_Y", fold_number) - H <- V*(A/g1 - (1-A)*(1/g0)) + H <- V * (A / g1 - (1 - A) * (1 / g0)) EIF_Y <- NULL # Store EIF component - if(is_training_task) { - scale <- apply(V,2, function(v) {apply(self$weights * V*(v ),2,mean ) }) + if (is_training_task) { + scale <- apply(V, 2, function(v) { + apply(self$weights * V * (v), 2, mean) + }) scaleinv <- solve(scale) - EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y-Q) - EIF_WA <- apply(V, 2, function(v) { - self$weights*(v*(Q1 - Q0 - CATE) - mean(self$weights*(Q1 - Q0 - CATE))) + EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y - Q) + EIF_WA <- apply(V, 2, function(v) { + self$weights * (v * (Q1 - Q0 - CATE) - mean(self$weights * (Q1 - Q0 - CATE))) }) %*% scaleinv # print(dim(EIF_Y)) - #print(mean(EIF_Y)) + # print(mean(EIF_Y)) } @@ -134,8 +134,8 @@ Param_npCATE <- R6Class( cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] W <- tmle_task$get_tmle_node("W") - A <- tmle_task$get_tmle_node("A", format = T )[[1]] - Y <- tmle_task$get_tmle_node("Y", format = T )[[1]] + A <- tmle_task$get_tmle_node("A", format = T)[[1]] + Y <- tmle_task$get_tmle_node("Y", format = T)[[1]] weights <- tmle_task$weights # clever_covariates happen here (for this param) only, but this is repeated computation @@ -144,9 +144,9 @@ Param_npCATE <- R6Class( Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) - Qtest <- ifelse(A==1, Q1, Q0) - if(!all(Qtest-Q==0)) { - print(quantile(abs(Qtest-Q))) + Qtest <- ifelse(A == 1, Q1, Q0) + if (!all(Qtest - Q == 0)) { + print(quantile(abs(Qtest - Q))) stop("Q and Q1,Q0 dont match") } # Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) @@ -184,7 +184,7 @@ Param_npCATE <- R6Class( update_nodes = function() { return(c(self$outcome_node)) }, - formula_CATE = function(){ + formula_CATE = function() { return(private$.formula_CATE) } ), diff --git a/R/Param_npCATT.R b/R/Param_npCATT.R index a7005e9a..5ebc522a 100644 --- a/R/Param_npCATT.R +++ b/R/Param_npCATT.R @@ -49,12 +49,12 @@ Param_npCATT <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_CATT =~ 1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { + initialize = function(observed_likelihood, formula_CATT = ~1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") V <- model.matrix(formula_CATT, as.data.frame(W)) - private$.targeted <- rep(T,ncol(V)) + private$.targeted <- rep(T, ncol(V)) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists @@ -68,8 +68,6 @@ Param_npCATT <- R6Class( private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) }, clever_covariates = function(tmle_task = NULL, fold_number = "full", is_training_task = TRUE) { - - training_task <- self$observed_likelihood$training_task if (is.null(tmle_task)) { tmle_task <- training_task @@ -82,16 +80,16 @@ Param_npCATT <- R6Class( W <- tmle_task$get_tmle_node("W") V <- model.matrix(self$formula_CATT, as.data.frame(W)) - A <- tmle_task$get_tmle_node("A", format = T )[[1]] - Y <- tmle_task$get_tmle_node("Y", format = T )[[1]] + A <- tmle_task$get_tmle_node("A", format = T)[[1]] + Y <- tmle_task$get_tmle_node("Y", format = T)[[1]] W_train <- training_task$get_tmle_node("W") V_train <- model.matrix(self$formula_CATT, as.data.frame(W_train)) A_train <- training_task$get_tmle_node("A", format = TRUE)[[1]] Y_train <- training_task$get_tmle_node("Y", format = TRUE)[[1]] g <- self$observed_likelihood$get_likelihoods(tmle_task, "A", fold_number) - g1 <- ifelse(A==1, g, 1-g) - g0 <- 1-g1 + g1 <- ifelse(A == 1, g, 1 - g) + g0 <- 1 - g1 Q <- as.vector(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) Q0 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number)) @@ -102,24 +100,29 @@ Param_npCATT <- R6Class( # var_Y0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "var_Y", fold_number) # var_Y1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "var_Y", fold_number) - H <- V*(A - (1-A)*(g1/g0)) + H <- V * (A - (1 - A) * (g1 / g0)) EIF_Y <- NULL EIF_WA <- NULL # Store EIF component - if(is_training_task) { - tryCatch({ - scale <- apply(V,2, function(v) {apply(self$weights *(A*v*V ),2,mean ) }) - - scaleinv <- solve(scale) - EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y-Q) - EIF_WA <- apply(V, 2, function(v) { - self$weights*(A*v*(Q1 - V%*%beta - Q0)) - mean(self$weights*(A*v*(Q1 - V%*%beta - Q0))) - }) %*% scaleinv - }, error = function(...){}) + if (is_training_task) { + tryCatch( + { + scale <- apply(V, 2, function(v) { + apply(self$weights * (A * v * V), 2, mean) + }) + + scaleinv <- solve(scale) + EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y - Q) + EIF_WA <- apply(V, 2, function(v) { + self$weights * (A * v * (Q1 - V %*% beta - Q0)) - mean(self$weights * (A * v * (Q1 - V %*% beta - Q0))) + }) %*% scaleinv + }, + error = function(...) {} + ) # print(dim(EIF_Y)) - #print(mean(EIF_Y)) + # print(mean(EIF_Y)) } @@ -133,8 +136,8 @@ Param_npCATT <- R6Class( cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] W <- tmle_task$get_tmle_node("W") - A <- tmle_task$get_tmle_node("A", format = T )[[1]] - Y <- tmle_task$get_tmle_node("Y", format = T )[[1]] + A <- tmle_task$get_tmle_node("A", format = T)[[1]] + Y <- tmle_task$get_tmle_node("Y", format = T)[[1]] weights <- tmle_task$weights # clever_covariates happen here (for this param) only, but this is repeated computation @@ -143,9 +146,9 @@ Param_npCATT <- R6Class( Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) - Qtest <- ifelse(A==1, Q1, Q0) - if(!all(Qtest-Q==0)) { - print(quantile(abs(Qtest-Q))) + Qtest <- ifelse(A == 1, Q1, Q0) + if (!all(Qtest - Q == 0)) { + print(quantile(abs(Qtest - Q))) stop("Q and Q1,Q0 dont match") } # Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) @@ -183,7 +186,7 @@ Param_npCATT <- R6Class( update_nodes = function() { return(c(self$outcome_node)) }, - formula_CATT = function(){ + formula_CATT = function() { return(private$.formula_CATT) } ), diff --git a/R/Param_npOR.R b/R/Param_npOR.R index 643b237d..899d4eab 100644 --- a/R/Param_npOR.R +++ b/R/Param_npOR.R @@ -51,7 +51,7 @@ Param_npOR <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_logOR =~ 1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { + initialize = function(observed_likelihood, formula_logOR = ~1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists @@ -65,8 +65,6 @@ Param_npOR <- R6Class( private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) }, clever_covariates = function(tmle_task = NULL, fold_number = "full", is_training_task = TRUE) { - - training_task <- self$observed_likelihood$training_task if (is.null(tmle_task)) { tmle_task <- training_task @@ -87,8 +85,8 @@ Param_npOR <- R6Class( Y_train <- training_task$get_tmle_node("Y", format = TRUE)[[1]] g <- self$observed_likelihood$get_likelihoods(tmle_task, "A", fold_number) - g1 <- ifelse(A==1, g, 1-g) - g0 <- 1-g1 + g1 <- ifelse(A == 1, g, 1 - g) + g0 <- 1 - g1 Q <- as.vector(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) Q0 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number)) @@ -98,31 +96,35 @@ Param_npOR <- R6Class( Q1 <- bound(Q1, 0.005) beta <- get_beta(W_train, A_train, self$formula_logOR, Q1, Q0, family = binomial(), weights = self$weights) - Q1beta <- as.vector(plogis(qlogis(Q0) + V%*%beta)) + Q1beta <- as.vector(plogis(qlogis(Q0) + V %*% beta)) - sigma_rel <- Q1beta*(1-Q1beta) / (Q0*(1-Q0)) + sigma_rel <- Q1beta * (1 - Q1beta) / (Q0 * (1 - Q0)) - omega <- as.vector((g0 + g1*sigma_rel) / (g0)) + omega <- as.vector((g0 + g1 * sigma_rel) / (g0)) - h_star <- -1*as.vector((g1*sigma_rel) / (g1*sigma_rel + (1-g1))) - H <- as.matrix(omega*V*(A + h_star)) + h_star <- -1 * as.vector((g1 * sigma_rel) / (g1 * sigma_rel + (1 - g1))) + H <- as.matrix(omega * V * (A + h_star)) # Store EIF component EIF_Y <- NULL EIFWA <- NULL - if(is_training_task) { - tryCatch({ - scale <- apply(V,2, function(v){apply(self$weights*(A * Q1beta*(1-Q1beta) * v*V),2,mean)}) - scaleinv <- solve(scale) - EIF_Y <- self$weights * (H%*% scaleinv) * as.vector(Y-Q) - EIFWA <- apply(V, 2, function(v) { - (self$weights*(A*v*(Q1 - Q1beta)) - mean( self$weights*(A*v*(Q1 - Q1beta)))) - }) - - EIFWA <- EIFWA %*% scaleinv - - } ,error = function(...) {} ) + if (is_training_task) { + tryCatch( + { + scale <- apply(V, 2, function(v) { + apply(self$weights * (A * Q1beta * (1 - Q1beta) * v * V), 2, mean) + }) + scaleinv <- solve(scale) + EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y - Q) + EIFWA <- apply(V, 2, function(v) { + (self$weights * (A * v * (Q1 - Q1beta)) - mean(self$weights * (A * v * (Q1 - Q1beta)))) + }) + + EIFWA <- EIFWA %*% scaleinv + }, + error = function(...) {} + ) } return(list(Y = H, EIF = list(Y = EIF_Y, WA = EIFWA))) @@ -144,8 +146,8 @@ Param_npOR <- R6Class( Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) - Qtest <- ifelse(A==1, Q1, Q0) - if(!all(Qtest-Q==0)) { + Qtest <- ifelse(A == 1, Q1, Q0) + if (!all(Qtest - Q == 0)) { stop("Q and Q1,Q0 dont match") } # Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) @@ -156,7 +158,7 @@ Param_npOR <- R6Class( Q1 <- bound(Q1, 0.0005) beta <- get_beta(W, A, self$formula_logOR, Q1, Q0, family = binomial(), weights = weights) V <- model.matrix(self$formula_logOR, as.data.frame(W)) - OR <- exp(V%*%beta) + OR <- exp(V %*% beta) IC <- EIF @@ -184,7 +186,7 @@ Param_npOR <- R6Class( update_nodes = function() { return(c(self$outcome_node)) }, - formula_logOR = function(){ + formula_logOR = function() { return(private$.formula_logOR) } ), diff --git a/R/Param_spCATE.R b/R/Param_spCATE.R index 767a309a..cc281afc 100644 --- a/R/Param_spCATE.R +++ b/R/Param_spCATE.R @@ -49,12 +49,12 @@ Param_spCATE <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_CATE =~ 1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { + initialize = function(observed_likelihood, formula_CATE = ~1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") V <- model.matrix(formula_CATE, as.data.frame(W)) - private$.targeted <- rep(T,ncol(V)) + private$.targeted <- rep(T, ncol(V)) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists @@ -68,8 +68,6 @@ Param_spCATE <- R6Class( private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) }, clever_covariates = function(tmle_task = NULL, fold_number = "full", is_training_task = TRUE) { - - training_task <- self$observed_likelihood$training_task if (is.null(tmle_task)) { tmle_task <- training_task @@ -82,46 +80,51 @@ Param_spCATE <- R6Class( W <- tmle_task$get_tmle_node("W") V <- model.matrix(self$formula_CATE, as.data.frame(W)) - A <- tmle_task$get_tmle_node("A", format = T )[[1]] - Y <- tmle_task$get_tmle_node("Y", format = T )[[1]] + A <- tmle_task$get_tmle_node("A", format = T)[[1]] + Y <- tmle_task$get_tmle_node("Y", format = T)[[1]] g <- self$observed_likelihood$get_likelihoods(tmle_task, "A", fold_number) - g1 <- ifelse(A==1, g, 1-g) - g0 <- 1-g1 - #Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) - #Q0 <- Q_packed[[1]] - #Q1 <- Q_packed[[2]] - #Q <- Q_packed[[3]] + g1 <- ifelse(A == 1, g, 1 - g) + g0 <- 1 - g1 + # Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + # Q0 <- Q_packed[[1]] + # Q1 <- Q_packed[[2]] + # Q <- Q_packed[[3]] Q <- as.vector(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) Q0 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number)) Q1 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number)) - #print(data.table(Q0,Q1,Q)) - #Extract current semiparametric coef - #print(data.table(Q1,Q0)) - #beta <- get_beta(W, A, self$formula_CATE, Q1, Q0, family = gaussian(), weights = weights) + # print(data.table(Q0,Q1,Q)) + # Extract current semiparametric coef + # print(data.table(Q1,Q0)) + # beta <- get_beta(W, A, self$formula_CATE, Q1, Q0, family = gaussian(), weights = weights) # Get conditional variances var_Y <- as.vector(self$cf_likelihood_treatment$get_likelihoods(tmle_task, "var_Y", fold_number)) var_Y0 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task0, "var_Y", fold_number)) var_Y1 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task1, "var_Y", fold_number)) gradM <- V - num <- gradM * ( g1/var_Y1) - denom <- (g0/ var_Y0 + g1/var_Y1) - hstar <- - num/denom - H <- as.matrix((A*gradM + hstar) /var_Y) + num <- gradM * (g1 / var_Y1) + denom <- (g0 / var_Y0 + g1 / var_Y1) + hstar <- -num / denom + H <- as.matrix((A * gradM + hstar) / var_Y) EIF_Y <- NULL # Store EIF component - if(is_training_task) { - tryCatch({ - scale <- apply(V,2, function(v) {apply(self$weights * H *(A*v ),2,mean ) }) - - scaleinv <- solve(scale) - EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y-Q) - },error = function(...){}) - - # print(dim(EIF_Y)) - #print(mean(EIF_Y)) + if (is_training_task) { + tryCatch( + { + scale <- apply(V, 2, function(v) { + apply(self$weights * H * (A * v), 2, mean) + }) + + scaleinv <- solve(scale) + EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y - Q) + }, + error = function(...) {} + ) + + # print(dim(EIF_Y)) + # print(mean(EIF_Y)) } @@ -135,8 +138,8 @@ Param_spCATE <- R6Class( cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] W <- tmle_task$get_tmle_node("W") - A <- tmle_task$get_tmle_node("A", format = T )[[1]] - Y <- tmle_task$get_tmle_node("Y", format = T )[[1]] + A <- tmle_task$get_tmle_node("A", format = T)[[1]] + Y <- tmle_task$get_tmle_node("Y", format = T)[[1]] weights <- tmle_task$weights # clever_covariates happen here (for this param) only, but this is repeated computation @@ -146,9 +149,9 @@ Param_spCATE <- R6Class( Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) - Qtest <- ifelse(A==1, Q1, Q0) - if(!all(Qtest-Q==0)) { - print(quantile(abs(Qtest-Q))) + Qtest <- ifelse(A == 1, Q1, Q0) + if (!all(Qtest - Q == 0)) { + print(quantile(abs(Qtest - Q))) stop("Q and Q1,Q0 dont match") } # Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) @@ -186,7 +189,7 @@ Param_spCATE <- R6Class( update_nodes = function() { return(c(self$outcome_node)) }, - formula_CATE = function(){ + formula_CATE = function() { return(private$.formula_CATE) } ), diff --git a/R/Param_spOR.R b/R/Param_spOR.R index a3f7b56d..81f1a487 100644 --- a/R/Param_spOR.R +++ b/R/Param_spOR.R @@ -49,7 +49,7 @@ Param_spOR <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_logOR =~ 1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { + initialize = function(observed_likelihood, formula_logOR = ~1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists @@ -63,8 +63,6 @@ Param_spOR <- R6Class( private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) }, clever_covariates = function(tmle_task = NULL, fold_number = "full", is_training_task = TRUE) { - - training_task <- self$observed_likelihood$training_task if (is.null(tmle_task)) { tmle_task <- training_task @@ -80,33 +78,35 @@ Param_spOR <- R6Class( A <- tmle_task$get_tmle_node("A", format = TRUE)[[1]] Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] g <- self$observed_likelihood$get_likelihood(tmle_task, "A", fold_number) - g1 <- ifelse(A==1, g, 1-g) - g0 <- 1-g1 - #Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) - #Q0 <- Q_packed[[1]] - #Q1 <- Q_packed[[2]] - #Q <- Q_packed[[3]] + g1 <- ifelse(A == 1, g, 1 - g) + g0 <- 1 - g1 + # Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + # Q0 <- Q_packed[[1]] + # Q1 <- Q_packed[[2]] + # Q <- Q_packed[[3]] Q <- as.vector(self$observed_likelihood$get_likelihood(tmle_task, "Y", fold_number)) Q0 <- as.vector(self$cf_likelihood_treatment$get_likelihood(cf_task0, "Y", fold_number)) Q1 <- as.vector(self$cf_likelihood_treatment$get_likelihood(cf_task1, "Y", fold_number)) Qorig <- Q Q0 <- bound(Q0, 0.005) Q1 <- bound(Q1, 0.005) - sigma_rel <- Q1*(1-Q1) / (Q0*(1-Q0)) + sigma_rel <- Q1 * (1 - Q1) / (Q0 * (1 - Q0)) - h_star <- -1*as.vector((g1*sigma_rel) / (g1*sigma_rel + (1-g1))) - H <- as.matrix(V*(A + h_star)) + h_star <- -1 * as.vector((g1 * sigma_rel) / (g1 * sigma_rel + (1 - g1))) + H <- as.matrix(V * (A + h_star)) # Store EIF component EIF_Y <- NULL - if(is_training_task) { + if (is_training_task) { tryCatch({ - scale <- apply(V,2, function(v){apply(self$weights*as.vector( Q1*(1-Q1) * Q0*(1-Q0) * g1 * (1-g1) / (g1 * Q1*(1-Q1) + (1-g1) *Q0*(1-Q0) )) * v*V,2,mean)}) - scaleinv <- solve(scale) + scale <- apply(V, 2, function(v) { + apply(self$weights * as.vector(Q1 * (1 - Q1) * Q0 * (1 - Q0) * g1 * (1 - g1) / (g1 * Q1 * (1 - Q1) + (1 - g1) * Q0 * (1 - Q0))) * v * V, 2, mean) + }) + scaleinv <- solve(scale) - EIF_Y <- self$weights * (H%*% scaleinv) * as.vector(Y-Q) - }) + EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y - Q) + }) } return(list(Y = H, EIF = list(Y = EIF_Y))) @@ -127,8 +127,8 @@ Param_spOR <- R6Class( Q <- self$observed_likelihood$get_likelihood(tmle_task, "Y", fold_number) Q0 <- self$cf_likelihood_treatment$get_likelihood(cf_task0, "Y", fold_number) Q1 <- self$cf_likelihood_treatment$get_likelihood(cf_task1, "Y", fold_number) - Qtest <- ifelse(A==1, Q1, Q0) - if(!all(Qtest-Q==0)) { + Qtest <- ifelse(A == 1, Q1, Q0) + if (!all(Qtest - Q == 0)) { stop("Q and Q1,Q0 dont match") } # Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) @@ -139,7 +139,7 @@ Param_spOR <- R6Class( Q1 <- bound(Q1, 0.0005) beta <- get_beta(W, A, self$formula_logOR, Q1, Q0, family = binomial(), weights = weights) V <- model.matrix(self$formula_logOR, as.data.frame(W)) - OR <- as.vector(exp(V%*%beta)) + OR <- as.vector(exp(V %*% beta)) IC <- as.matrix(EIF) @@ -168,7 +168,7 @@ Param_spOR <- R6Class( update_nodes = function() { return(c(self$outcome_node)) }, - formula_logOR = function(){ + formula_logOR = function() { return(private$.formula_logOR) } ), diff --git a/R/Param_spRR.R b/R/Param_spRR.R index f9d3461d..7731dc74 100644 --- a/R/Param_spRR.R +++ b/R/Param_spRR.R @@ -1,6 +1,6 @@ #' Semiparametric estimation of the conditonal relative risk/treatment-effect for arbitrary partially-linear log-linear/link regression models. #' Arbitrary user-specified parametric models for the conditional relative-risk are supported. -#` This method implements semiparametric efficient relative-risk regression for nonnegative outcomes. +# ` This method implements semiparametric efficient relative-risk regression for nonnegative outcomes. #' Assuming the semiparametric model to be true allows for some efficiency gain (when true) but may lead to less robust estimates due to misspecification. #' The parametric model is at the log-scale and therefore the coefficients returned code the linear predictor for the `log`-relative-risk. #' @importFrom R6 R6Class @@ -50,7 +50,7 @@ Param_spRR <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_logRR =~ 1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { + initialize = function(observed_likelihood, formula_logRR = ~1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists @@ -64,8 +64,6 @@ Param_spRR <- R6Class( private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) }, clever_covariates = function(tmle_task = NULL, fold_number = "full", is_training_task = TRUE) { - - training_task <- self$observed_likelihood$training_task if (is.null(tmle_task)) { tmle_task <- training_task @@ -82,12 +80,12 @@ Param_spRR <- R6Class( Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] g <- self$observed_likelihood$get_likelihood(tmle_task, "A", fold_number) - g1 <- ifelse(A==1, g, 1-g) - g0 <- 1-g1 - #Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) - #Q0 <- Q_packed[[1]] - #Q1 <- Q_packed[[2]] - #Q <- Q_packed[[3]] + g1 <- ifelse(A == 1, g, 1 - g) + g0 <- 1 - g1 + # Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + # Q0 <- Q_packed[[1]] + # Q1 <- Q_packed[[2]] + # Q <- Q_packed[[3]] Q <- as.vector(self$observed_likelihood$get_likelihood(tmle_task, "Y", fold_number)) Q0 <- as.vector(self$cf_likelihood_treatment$get_likelihood(cf_task0, "Y", fold_number)) Q1 <- as.vector(self$cf_likelihood_treatment$get_likelihood(cf_task1, "Y", fold_number)) @@ -96,24 +94,23 @@ Param_spRR <- R6Class( Q0 <- pmax(Q0, 0.005) Q1 <- pmax(Q1, 0.005) - RR <- as.vector(Q1/Q0) + RR <- as.vector(Q1 / Q0) gradM <- V - mstar <- RR + (1-A)*1 - num <- gradM * ( RR * g1) + mstar <- RR + (1 - A) * 1 + num <- gradM * (RR * g1) denom <- RR * g1 + g0 - hstar <- - num/denom - H <- as.matrix(A*gradM + hstar) + hstar <- -num / denom + H <- as.matrix(A * gradM + hstar) # Store EIF component EIF_Y <- NULL - if(is_training_task) { - scale <- apply(V,2, function(v) { - apply(self$weights*V*v*g1*g0*RR/(g1*RR + g0)^2 *(Y-Q) + self$weights*H*(A*v*Q),2,mean) + if (is_training_task) { + scale <- apply(V, 2, function(v) { + apply(self$weights * V * v * g1 * g0 * RR / (g1 * RR + g0)^2 * (Y - Q) + self$weights * H * (A * v * Q), 2, mean) }) scaleinv <- solve(scale) - EIF_Y <- as.matrix(self$weights * (H%*% scaleinv) * (Y-Q)) - + EIF_Y <- as.matrix(self$weights * (H %*% scaleinv) * (Y - Q)) } return(list(Y = H, EIF = list(Y = EIF_Y))) @@ -135,8 +132,8 @@ Param_spRR <- R6Class( Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) - Qtest <- ifelse(A==1, Q1, Q0) - if(!all(Qtest-Q==0)) { + Qtest <- ifelse(A == 1, Q1, Q0) + if (!all(Qtest - Q == 0)) { stop("Q and Q1,Q0 dont match") } # Q_packed <- sl3::unpack_predictions(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) @@ -148,7 +145,7 @@ Param_spRR <- R6Class( Q1 <- pmax(Q1, 0.0005) beta <- get_beta(W, A, self$formula_logRR, Q1, Q0, family = poisson(), weights = weights) V <- model.matrix(self$formula_logRR, as.data.frame(W)) - RR <- as.vector(exp(V%*%beta)) + RR <- as.vector(exp(V %*% beta)) IC <- as.matrix(EIF) @@ -176,7 +173,7 @@ Param_spRR <- R6Class( update_nodes = function() { return(c(self$outcome_node)) }, - formula_logRR = function(){ + formula_logRR = function() { return(private$.formula_logRR) } ), diff --git a/R/helpers_point_treatment.R b/R/helpers_point_treatment.R index 72f2d0b4..02ab6814 100644 --- a/R/helpers_point_treatment.R +++ b/R/helpers_point_treatment.R @@ -17,7 +17,7 @@ point_tx_npsem <- function(node_list, variable_types = NULL, scale_outcome = TRU define_node("A", node_list$A, c("W"), variable_type = variable_types$A), define_node("Y", node_list$Y, c("A", "W"), variable_type = variable_types$Y, scale = scale_outcome) ) - if(include_variance_node) { + if (include_variance_node) { npsem$var_Y <- define_node("var_Y", node_list$Y, c("A", "W"), variable_type = variable_types$var_Y, scale = FALSE) } @@ -26,7 +26,7 @@ point_tx_npsem <- function(node_list, variable_types = NULL, scale_outcome = TRU #' @export #' @rdname point_tx -point_tx_task <- function(data, node_list, variable_types = NULL, scale_outcome = TRUE, include_variance_node = FALSE, ...) { +point_tx_task <- function(data, node_list, variable_types = NULL, scale_outcome = TRUE, include_variance_node = FALSE, ...) { setDT(data) npsem <- point_tx_npsem(node_list, variable_types, scale_outcome, include_variance_node) @@ -87,29 +87,29 @@ point_tx_likelihood <- function(tmle_task, learner_list) { likelihood <- likelihood_def$train(tmle_task) # If conditional variance needs to be estimated, do so. - if("var_Y" %in% names(tmle_task$npsem)) { - if(is.null(learner_list[["var_Y"]])) { + if ("var_Y" %in% names(tmle_task$npsem)) { + if (is.null(learner_list[["var_Y"]])) { learner_list[["var_Y"]] <- Lrnr_glmnet$new(family = "poisson") warning("Node var_Y is in npsem but no learner is provided in `learner_list`. Defaulting to glmnet with `poisson` family.") } - if(tmle_task$npsem[["Y"]]$variable_type$type == "binomial") { + if (tmle_task$npsem[["Y"]]$variable_type$type == "binomial") { mean_fun <- function(task, likelihood, tmle_task) { - EY <- sl3::unpack_predictions(likelihood$get_likelihood(tmle_task, "Y")) + EY <- sl3::unpack_predictions(likelihood$get_likelihood(tmle_task, "Y")) EY <- EY[, ncol(EY)] - return(EY * (1-EY)) + return(EY * (1 - EY)) } - LF_var_Y <- LF_known$new("var_Y", mean_fun = mean_fun , base_likelihood = likelihood, type = "mean") + LF_var_Y <- LF_known$new("var_Y", mean_fun = mean_fun, base_likelihood = likelihood, type = "mean") } else { task_generator <- function(tmle_task, base_likelihood) { EY <- sl3::unpack_predictions(base_likelihood$get_likelihood(tmle_task, "Y")) EY <- EY[, ncol(EY)] Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] - outcome <- (Y-EY)^2 + outcome <- (Y - EY)^2 task <- tmle_task$get_regression_task("Y") column_names <- task$add_columns(data.table("var_Y" = outcome)) - task <- task$next_in_chain(outcome = "var_Y", column_names = column_names ) + task <- task$next_in_chain(outcome = "var_Y", column_names = column_names) } - LF_var_Y <- LF_derived$new("var_Y", learner_list[["var_Y"]], likelihood, task_generator = task_generator , type = "mean") + LF_var_Y <- LF_derived$new("var_Y", learner_list[["var_Y"]], likelihood, task_generator = task_generator, type = "mean") } likelihood$add_factors(LF_var_Y) } diff --git a/R/helpers_semiparametric.R b/R/helpers_semiparametric.R index 60a8abcc..843a664a 100644 --- a/R/helpers_semiparametric.R +++ b/R/helpers_semiparametric.R @@ -1,28 +1,19 @@ get_beta <- function(W, A, formula, Q1, Q0, family, weights = NULL) { W <- as.matrix(W) - if(is.null(weights)) { + if (is.null(weights)) { weights <- rep(1, nrow(W)) } V <- model.matrix(formula, as.data.frame(W)) - Q <- ifelse(A==1, Q1, Q0) - beta <- suppressWarnings(coef(glm.fit(A*V, Q, offset = family$linkfun(Q0), family = family, intercept = F, weights = weights))) + Q <- ifelse(A == 1, Q1, Q0) + beta <- suppressWarnings(coef(glm.fit(A * V, Q, offset = family$linkfun(Q0), family = family, intercept = F, weights = weights))) return(beta) } -project_onto_model <- function(W, A, formula, Q1, Q0, family, weights = NULL) { +project_onto_model <- function(W, A, formula, Q1, Q0, family, weights = NULL) { beta <- get_beta(W, A, formula, Q1, Q0, family, weights) V <- model.matrix(formula, as.data.frame(W)) Q1 <- family$linkinv(family$linkfun(Q0) + V %*% beta) - Q <- ifelse(A==1, Q1, Q0) - return(cbind(Q0,Q1, Q)) + Q <- ifelse(A == 1, Q1, Q0) + return(cbind(Q0, Q1, Q)) } - - - - - - - - - diff --git a/R/submodels.R b/R/submodels.R index 7d72ca12..d257d3a2 100644 --- a/R/submodels.R +++ b/R/submodels.R @@ -24,10 +24,10 @@ generate_submodel_from_family <- function(family) { #' #' @export # -submodel_logistic_switch <- function(eps, offset, X, observed) { - offset <- ifelse(observed==1, offset, 1-offset) +submodel_logistic_switch <- function(eps, offset, X, observed) { + offset <- ifelse(observed == 1, offset, 1 - offset) output <- stats::plogis(stats::qlogis(offset) + X %*% eps) - output <- ifelse(observed==1, output, 1-output) + output <- ifelse(observed == 1, output, 1 - output) } #' Log likelihood loss for binary variables @@ -37,17 +37,17 @@ submodel_logistic_switch <- function(eps, offset, X, observed) { #' @param weights ... #' @param v ... #' @export -loss_function_loglik_binomial = function(estimate, observed, weights = NULL, likelihood = NULL, tmle_task = NULL, fold_number = NULL) { +loss_function_loglik_binomial <- function(estimate, observed, weights = NULL, likelihood = NULL, tmle_task = NULL, fold_number = NULL) { loss <- -1 * ifelse(observed == 1, log(estimate), log(1 - estimate)) - if(!is.null(weights)) { + if (!is.null(weights)) { loss <- weights * loss } return(loss) } #' @export -loss_function_loglik = function(estimate, observed, weights = NULL, likelihood = NULL, tmle_task = NULL, fold_number = NULL) { +loss_function_loglik <- function(estimate, observed, weights = NULL, likelihood = NULL, tmle_task = NULL, fold_number = NULL) { loss <- -1 * log(estimate) - if(!is.null(weights)) { + if (!is.null(weights)) { loss <- weights * loss } return(loss) @@ -70,9 +70,9 @@ submodel_linear <- generate_submodel_from_family(gaussian()) #' @param weights ... #' @param likelihood ... #' @export -loss_function_least_squares = function(estimate, observed, weights = NULL, likelihood = NULL, tmle_task = NULL, fold_number = NULL) { +loss_function_least_squares <- function(estimate, observed, weights = NULL, likelihood = NULL, tmle_task = NULL, fold_number = NULL) { loss <- (observed - estimate)^2 - if(!is.null(weights)) { + if (!is.null(weights)) { loss <- weights * loss } return(loss) @@ -88,7 +88,7 @@ loss_function_least_squares = function(estimate, observed, weights = NULL, like #' #' @export # -submodel_exp <- generate_submodel_from_family(poisson()) +submodel_exp <- generate_submodel_from_family(poisson()) #' Poisson/log-linear loss for nonnegative variables #' @@ -97,9 +97,9 @@ submodel_exp <- generate_submodel_from_family(poisson()) #' @param weights ... #' @param likelihood ... #' @export -loss_function_poisson = function(estimate, observed, weights = NULL, likelihood = NULL, tmle_task = NULL, fold_number = NULL) { - loss <- estimate - observed * log(estimate) - if(!is.null(weights)) { +loss_function_poisson <- function(estimate, observed, weights = NULL, likelihood = NULL, tmle_task = NULL, fold_number = NULL) { + loss <- estimate - observed * log(estimate) + if (!is.null(weights)) { loss <- weights * loss } return(loss) @@ -109,17 +109,17 @@ loss_function_poisson = function(estimate, observed, weights = NULL, likelihood #' @param family ... #' @export generate_loss_function_from_family <- function(family) { - if(!is.character(family)) { + if (!is.character(family)) { family <- family$family } - if(!(family %in% c("poisson", "gaussian", "binomial"))){ + if (!(family %in% c("poisson", "gaussian", "binomial"))) { stop("Unsupported family object.") } - if(family == "poisson"){ + if (family == "poisson") { return(loss_function_poisson) - } else if(family == "gaussian"){ + } else if (family == "gaussian") { return(loss_function_least_squares) - } else if(family == "binomial"){ + } else if (family == "binomial") { return(loss_function_loglik_binomial) } } @@ -128,15 +128,15 @@ generate_loss_function_from_family <- function(family) { #' Main maker of submodel specs. #' @param name ... #' @export -make_submodel_spec <- function(name, family = NULL, submodel_function = NULL, loss_function = NULL) { - if(is.null(submodel_function) && inherits(submodel_function, "family")) { +make_submodel_spec <- function(name, family = NULL, submodel_function = NULL, loss_function = NULL) { + if (is.null(submodel_function) && inherits(submodel_function, "family")) { submodel_function <- generate_submodel_from_family(submodel_function) - } else if(is.null(submodel_function) && !is.null(family)) { + } else if (is.null(submodel_function) && !is.null(family)) { submodel_function <- generate_submodel_from_family(family) } - if(is.null(loss_function) && inherits(loss_function, "family")) { + if (is.null(loss_function) && inherits(loss_function, "family")) { generate_loss_function_from_family(loss_function) - } else if(is.null(loss_function) && !is.null(family)) { + } else if (is.null(loss_function) && !is.null(family)) { loss_function <- generate_loss_function_from_family(family) } return(list(name = name, family = family, submodel_function = submodel_function, loss_function = loss_function)) @@ -147,22 +147,27 @@ make_submodel_spec <- function(name, family = NULL, submodel_function = NULL, l #' @export get_submodel_spec <- function(name) { output <- NULL - tryCatch({ - if(inherits(name, "family")) { - family <- name - } else { - split_names <- unlist(strsplit(name, "_")) - if(length(split_names)==2) { - family <- get(split_names[1])(link = split_names[2]) + tryCatch( + { + if (inherits(name, "family")) { + family <- name } else { - family <- get(split_names[1])() + split_names <- unlist(strsplit(name, "_")) + if (length(split_names) == 2) { + family <- get(split_names[1])(link = split_names[2]) + } else { + family <- get(split_names[1])() + } } + output <- make_submodel_spec(name, family) + }, + error = function(...) { + try({ + output <<- get(paste0("submodel_spec_", name)) + }) } - output <- make_submodel_spec(name, family) - }, error = function(...) { - try({output <<- get(paste0("submodel_spec_",name))}) - }) - if(is.null(output)) { + ) + if (is.null(output)) { stop(paste0("Argument name was not a valid family nor was `submodel_spec_", name, "` found in the environment.")) } return(output) @@ -170,5 +175,6 @@ get_submodel_spec <- function(name) { #' Submodel for binary outcomes where "initial" is a likelihood and not a conditional mean (e.g. for Param_ATC and Param_ATT for updating node `A`). #' @export -submodel_spec_logistic_switch <- list(name = "logistic_switch", family = function(){stop("Does not support family-based updating. Please use optim instead.")}, submodel_function = submodel_logistic_switch, loss_function = loss_function_loglik) - +submodel_spec_logistic_switch <- list(name = "logistic_switch", family = function() { + stop("Does not support family-based updating. Please use optim instead.") +}, submodel_function = submodel_logistic_switch, loss_function = loss_function_loglik) diff --git a/R/tmle3_Spec_npCausalGLM.R b/R/tmle3_Spec_npCausalGLM.R index 53981f6d..9e3fc59f 100644 --- a/R/tmle3_Spec_npCausalGLM.R +++ b/R/tmle3_Spec_npCausalGLM.R @@ -11,14 +11,15 @@ tmle3_Spec_npCausalGLM <- R6Class( portable = TRUE, class = TRUE, public = list( - initialize = function(formula, estimand = c("CATE", "CATT", "OR", "RR"), treatment_level = 1, control_level =0, + initialize = function(formula, estimand = c("CATE", "CATT", "OR", "RR"), treatment_level = 1, control_level = 0, likelihood_override = NULL, variable_types = NULL, ...) { estimand <- match.arg(estimand) - private$.options <- list(estimand = estimand, formula = formula, - treatment_level = treatment_level, control_level = control_level, - likelihood_override = likelihood_override, - variable_types = variable_types, ... + private$.options <- list( + estimand = estimand, formula = formula, + treatment_level = treatment_level, control_level = control_level, + likelihood_override = likelihood_override, + variable_types = variable_types, ... ) }, make_tmle_task = function(data, node_list, ...) { @@ -29,8 +30,8 @@ tmle3_Spec_npCausalGLM <- R6Class( return(tmle_task) }, - make_initial_likelihood = function(tmle_task, learner_list = NULL ) { - #Wrap baseline learner in semiparametric learner + make_initial_likelihood = function(tmle_task, learner_list = NULL) { + # Wrap baseline learner in semiparametric learner # produce trained likelihood when likelihood_def provided if (!is.null(self$options$likelihood_override)) { @@ -41,13 +42,13 @@ tmle3_Spec_npCausalGLM <- R6Class( return(likelihood) }, - make_updater = function(convergence_type = "sample_size", verbose = F,...) { - if(self$options$estimand == "CATE" || self$options$estimand == "CATT"){ - updater <- tmle3_Update$new(maxit=100,one_dimensional = FALSE, verbose = verbose, constrain_step = FALSE, bounds = c(-Inf, Inf), ...) - } else if (self$options$estimand == "OR"){ - updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose,delta_epsilon = 0.01, constrain_step = TRUE, bounds = 0.0025, ...) - } else if (self$options$estimand == "RR"){ - updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.01, constrain_step = TRUE, bounds = c(0.0025, Inf), ...) + make_updater = function(convergence_type = "sample_size", verbose = F, ...) { + if (self$options$estimand == "CATE" || self$options$estimand == "CATT") { + updater <- tmle3_Update$new(maxit = 100, one_dimensional = FALSE, verbose = verbose, constrain_step = FALSE, bounds = c(-Inf, Inf), ...) + } else if (self$options$estimand == "OR") { + updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.01, constrain_step = TRUE, bounds = 0.0025, ...) + } else if (self$options$estimand == "RR") { + updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.01, constrain_step = TRUE, bounds = c(0.0025, Inf), ...) } return(updater) }, @@ -66,13 +67,13 @@ tmle3_Spec_npCausalGLM <- R6Class( treatment <- define_lf(LF_static, "A", value = treatment_value) control <- define_lf(LF_static, "A", value = control_value) formula <- self$options$formula - if(self$options$estimand == "CATE"){ - param <- Param_npCATE$new(targeted_likelihood,formula, treatment, control) - } else if(self$options$estimand == "CATT"){ - param <- Param_npCATT$new(targeted_likelihood,formula, treatment, control) - } else if (self$options$estimand == "OR"){ - param <- Param_npOR$new(targeted_likelihood,formula, treatment, control) - } else if (self$options$estimand == "RR"){ + if (self$options$estimand == "CATE") { + param <- Param_npCATE$new(targeted_likelihood, formula, treatment, control) + } else if (self$options$estimand == "CATT") { + param <- Param_npCATT$new(targeted_likelihood, formula, treatment, control) + } else if (self$options$estimand == "OR") { + param <- Param_npOR$new(targeted_likelihood, formula, treatment, control) + } else if (self$options$estimand == "RR") { param <- Param_npRR$new(targeted_likelihood, formula, treatment, control) } return(list(param)) diff --git a/R/tmle3_Update.R b/R/tmle3_Update.R index b51a0eca..3d6cd588 100644 --- a/R/tmle3_Update.R +++ b/R/tmle3_Update.R @@ -56,7 +56,7 @@ tmle3_Update <- R6Class( fluctuation_type = c("standard", "weighted"), optim_delta_epsilon = TRUE, use_best = FALSE, - verbose = FALSE, bounds = list(Y = 1e-5, A=0.005)) { + verbose = FALSE, bounds = list(Y = 1e-5, A = 0.005)) { private$.maxit <- maxit private$.cvtmle <- cvtmle private$.one_dimensional <- one_dimensional @@ -120,9 +120,9 @@ tmle3_Update <- R6Class( submodel_name <- submodel_spec$name # Check compatibility of tmle_params with submodel lapply(self$tmle_params, function(tmle_param) { - if(update_node %in% tmle_param$update_nodes ) { - if(!(tmle_param$supports_submodel(submodel_name, update_node))){ - stop(paste0("Incompatible parameter-specific submodel specs for update node: Parameter `", tmle_param$name, "`` does not support the submodel `", submodel_name, "` for update node `", update_node, "`.")) + if (update_node %in% tmle_param$update_nodes) { + if (!(tmle_param$supports_submodel(submodel_name, update_node))) { + stop(paste0("Incompatible parameter-specific submodel specs for update node: Parameter `", tmle_param$name, "`` does not support the submodel `", submodel_name, "` for update node `", update_node, "`.")) } } }) @@ -137,24 +137,27 @@ tmle3_Update <- R6Class( if (self$one_dimensional) { EIF_components <- NULL # If EIF components are provided use those instead of the full EIF - tryCatch({ - EIF_components <-lapply(clever_covariates, function(item) { - item$EIF[[update_node]] - }) - EIF_components <- do.call(cbind, EIF_components) - - ED <- colMeans(EIF_components) - - EDnormed <- ED / norm(ED, type = "2") - if(length(EIF_components) ==0 || ncol(EIF_components) != ncol(covariates_dt)) { - stop("Not all params provide EIF components") - } - }, error = function(...){}) - if(is.null(EIF_components)) { + tryCatch( + { + EIF_components <- lapply(clever_covariates, function(item) { + item$EIF[[update_node]] + }) + EIF_components <- do.call(cbind, EIF_components) + + ED <- colMeans(EIF_components) + + EDnormed <- ED / norm(ED, type = "2") + if (length(EIF_components) == 0 || ncol(EIF_components) != ncol(covariates_dt)) { + stop("Not all params provide EIF components") + } + }, + error = function(...) {} + ) + if (is.null(EIF_components)) { ED <- ED_from_estimates(self$current_estimates) EDnormed <- ED / norm(ED, type = "2") } - #covariates_dt <- self$collapse_covariates(self$current_estimates, covariates_dt) + # covariates_dt <- self$collapse_covariates(self$current_estimates, covariates_dt) } else { EDnormed <- NULL } @@ -213,9 +216,9 @@ tmle3_Update <- R6Class( # Extract submodel spec info EDnormed <- submodel_data$EDnormed - if(!is.null(EDnormed)) { + if (!is.null(EDnormed)) { # Collapse clever covariates - submodel_data$H <- as.matrix(submodel_data$H) %*% EDnormed + submodel_data$H <- as.matrix(submodel_data$H) %*% EDnormed } else { EDnormed <- 1 } @@ -242,7 +245,7 @@ tmle3_Update <- R6Class( risk <- function(epsilon) { submodel_estimate <- self$apply_submodel(submodel, submodel_data, epsilon) - loss <- loss_function(submodel_estimate, submodel_data$observed, weights = submodel_data$weights, likelihood = training_likelihood, tmle_task = training_task, fold_number = training_fold) + loss <- loss_function(submodel_estimate, submodel_data$observed, weights = submodel_data$weights, likelihood = training_likelihood, tmle_task = training_task, fold_number = training_fold) mean(loss) } @@ -271,11 +274,7 @@ tmle3_Update <- R6Class( cat(sprintf("risk_change: %e ", risk_val - risk_zero)) } } else { - if (self$fluctuation_type == "standard") { - - - suppressWarnings({ submodel_fit <- glm(observed ~ H - 1, submodel_data, offset = family_object$linkfun(submodel_data$initial), @@ -284,7 +283,6 @@ tmle3_Update <- R6Class( start = rep(0, ncol(submodel_data$H)) ) }) - } else if (self$fluctuation_type == "weighted") { if (self$one_dimensional) { suppressWarnings({ @@ -368,7 +366,7 @@ tmle3_Update <- R6Class( } else if (self$convergence_type == "sample_size") { ED_threshold <- 1 / n } else if (self$convergence_type == "exact") { - ED_threshold <- min(1/n,1e-8) + ED_threshold <- min(1 / n, 1e-8) } # get |P_n D*| of any number of parameter estimates @@ -447,9 +445,9 @@ tmle3_Update <- R6Class( }, bounds = function(node) { bounds <- private$.bounds - if(is.numeric(bounds)) { + if (is.numeric(bounds)) { return(bounds) - } else if(is.null(bounds[[node]])) { + } else if (is.null(bounds[[node]])) { bounds <- 0.005 } else { bounds <- bounds[[node]] diff --git a/R/tmle3_spec_spCausalGLM.R b/R/tmle3_spec_spCausalGLM.R index 26f483be..bce54d77 100644 --- a/R/tmle3_spec_spCausalGLM.R +++ b/R/tmle3_spec_spCausalGLM.R @@ -11,12 +11,13 @@ tmle3_Spec_spCausalGLM <- R6Class( portable = TRUE, class = TRUE, public = list( - initialize = function(formula, estimand = c("CATE", "OR", "RR"), treatment_level = 1, control_level =0, + initialize = function(formula, estimand = c("CATE", "OR", "RR"), treatment_level = 1, control_level = 0, likelihood_override = NULL, variable_types = NULL, ...) { estimand <- match.arg(estimand) - private$.options <- list(estimand = estimand, formula = formula, - treatment_level = treatment_level, control_level = control_level, + private$.options <- list( + estimand = estimand, formula = formula, + treatment_level = treatment_level, control_level = control_level, likelihood_override = likelihood_override, variable_types = variable_types, ... ) @@ -24,7 +25,7 @@ tmle3_Spec_spCausalGLM <- R6Class( make_tmle_task = function(data, node_list, ...) { variable_types <- self$options$variable_types include_variance_node <- self$options$estimand == "CATE" - if(self$options$estimand == "RR") { + if (self$options$estimand == "RR") { variable_types <- list(Y = variable_type("continuous")) } tmle_task <- point_tx_task(data, node_list, variable_types, scale_outcome = FALSE, include_variance_node = include_variance_node) @@ -32,9 +33,9 @@ tmle3_Spec_spCausalGLM <- R6Class( return(tmle_task) }, make_initial_likelihood = function(tmle_task, learner_list = NULL, append_interaction_matrix = TRUE, wrap_in_Lrnr_glm_sp = TRUE) { - #Wrap baseline learner in semiparametric learner - if(wrap_in_Lrnr_glm_sp) { - learner_list[["Y"]] <- Lrnr_glm_semiparametric$new(formula_sp = self$options$formula, family = self$family, interaction_variable = "A", lrnr_baseline = learner_list[["Y"]] , append_interaction_matrix = append_interaction_matrix) + # Wrap baseline learner in semiparametric learner + if (wrap_in_Lrnr_glm_sp) { + learner_list[["Y"]] <- Lrnr_glm_semiparametric$new(formula_sp = self$options$formula, family = self$family, interaction_variable = "A", lrnr_baseline = learner_list[["Y"]], append_interaction_matrix = append_interaction_matrix) } # produce trained likelihood when likelihood_def provided if (!is.null(self$options$likelihood_override)) { @@ -45,13 +46,13 @@ tmle3_Spec_spCausalGLM <- R6Class( return(likelihood) }, - make_updater = function(convergence_type = "sample_size", verbose = F,...) { - if(self$options$estimand == "CATE"){ - updater <- tmle3_Update$new(maxit=100,one_dimensional = FALSE, verbose = verbose, constrain_step = FALSE, bounds = c(-Inf, Inf), ...) - } else if (self$options$estimand == "OR"){ - updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose,delta_epsilon = 0.01, constrain_step = TRUE, bounds = 0.0025, ...) - } else if (self$options$estimand == "RR"){ - updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.01, constrain_step = TRUE, bounds = c(0.0025, Inf), ...) + make_updater = function(convergence_type = "sample_size", verbose = F, ...) { + if (self$options$estimand == "CATE") { + updater <- tmle3_Update$new(maxit = 100, one_dimensional = FALSE, verbose = verbose, constrain_step = FALSE, bounds = c(-Inf, Inf), ...) + } else if (self$options$estimand == "OR") { + updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.01, constrain_step = TRUE, bounds = 0.0025, ...) + } else if (self$options$estimand == "RR") { + updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.01, constrain_step = TRUE, bounds = c(0.0025, Inf), ...) } return(updater) }, @@ -70,11 +71,11 @@ tmle3_Spec_spCausalGLM <- R6Class( treatment <- define_lf(LF_static, "A", value = treatment_value) control <- define_lf(LF_static, "A", value = control_value) formula <- self$options$formula - if(self$options$estimand == "CATE"){ - param <- Param_spCATE$new(targeted_likelihood,formula, treatment, control) - } else if (self$options$estimand == "OR"){ - param <- Param_spOR$new(targeted_likelihood,formula, treatment, control) - } else if (self$options$estimand == "RR"){ + if (self$options$estimand == "CATE") { + param <- Param_spCATE$new(targeted_likelihood, formula, treatment, control) + } else if (self$options$estimand == "OR") { + param <- Param_spOR$new(targeted_likelihood, formula, treatment, control) + } else if (self$options$estimand == "RR") { param <- Param_spRR$new(targeted_likelihood, formula, treatment, control) } return(list(param)) diff --git a/tests/testthat/test-ATE.R b/tests/testthat/test-ATE.R index 4f9f0fde..86aaae9b 100644 --- a/tests/testthat/test-ATE.R +++ b/tests/testthat/test-ATE.R @@ -60,7 +60,7 @@ tmle_params <- tmle_spec$make_params(tmle_task, targeted_likelihood) updater$tmle_params <- tmle_params ate <- tmle_params[[1]] -#print(ate$get_submodel_spec("Y")) +# print(ate$get_submodel_spec("Y")) cf_task1 <- ate$cf_likelihood_treatment$cf_tasks[[1]] cf_task0 <- ate$cf_likelihood_control$cf_tasks[[1]] diff --git a/tests/testthat/test-spRR.R b/tests/testthat/test-spRR.R index ec85432a..46beb676 100644 --- a/tests/testthat/test-spRR.R +++ b/tests/testthat/test-spRR.R @@ -5,23 +5,22 @@ context("spRR test") passes <- c() -for(i in 1:1){ +for (i in 1:1) { print(i) n <- 500 W <- runif(n, -1, 1) A <- rbinom(n, size = 1, prob = plogis(W)) - Y <- rpois(n, exp(A + A*W + W)) - data <- data.table(W,A,Y) + Y <- rpois(n, exp(A + A * W + W)) + data <- data.table(W, A, Y) data lrnr_Y0W <- Lrnr_gam$new(family = poisson()) lrnr_A <- Lrnr_gam$new() - node_list <- list (W = "W", A = "A", Y= "Y") - learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) - spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W, "RR") - out <- suppressWarnings(tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + node_list <- list(W = "W", A = "A", Y = "Y") + learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) + spec_spCATE <- tmle3_Spec_spCausalGLM$new(~ 1 + W, "RR") + out <- suppressWarnings(tmle3(spec_spCATE, data, node_list, learner_list = learner_list)) out <- out$summary - passes <- cbind(passes , out$lower <= 1 & out$upper >= 1) + passes <- cbind(passes, out$lower <= 1 & out$upper >= 1) print(rowMeans(passes)) - } diff --git a/tests/testthat/test-spnpCATECATT.R b/tests/testthat/test-spnpCATECATT.R index df961e85..ef215d4f 100644 --- a/tests/testthat/test-spnpCATECATT.R +++ b/tests/testthat/test-spnpCATECATT.R @@ -5,37 +5,37 @@ passes <- c() passes1 <- c() passes2 <- c() -for(i in 1:1){ +for (i in 1:1) { print(i) n <- 500 W <- runif(n, -1, 1) A <- rbinom(n, size = 1, prob = plogis(W)) - Y <- rnorm(n, mean = A+W, sd = 0.3) - data <- data.table(W,A,Y) - lrnr_Y0W <- Lrnr_gam$new() + Y <- rnorm(n, mean = A + W, sd = 0.3) + data <- data.table(W, A, Y) + lrnr_Y0W <- Lrnr_gam$new() lrnr_A <- Lrnr_gam$new() - node_list <- list (W = "W", A = "A", Y= "Y") - learner_list <- list(A = lrnr_A, Y = lrnr_Y0W, var_Y = Lrnr_mean$new()) + node_list <- list(W = "W", A = "A", Y = "Y") + learner_list <- list(A = lrnr_A, Y = lrnr_Y0W, var_Y = Lrnr_mean$new()) # spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1, "CATE") # out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1, "CATE") - suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list)) out <- out$summary - passes <- c(passes , out$lower <= 1 & out$upper >= 1) + passes <- c(passes, out$lower <= 1 & out$upper >= 1) spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1, "CATT") - suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list)) out <- out$summary - passes1 <- c(passes1 , out$lower <= 1 & out$upper >= 1) + passes1 <- c(passes1, out$lower <= 1 & out$upper >= 1) spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1, "CATE") - suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list)) out <- out$summary - passes2 <- c(passes2 , out$lower <= 1 & out$upper >= 1) + passes2 <- c(passes2, out$lower <= 1 & out$upper >= 1) print(mean(passes)) print(mean(passes1)) diff --git a/tests/testthat/test-spnpOR.R b/tests/testthat/test-spnpOR.R index c8d480e2..fca7f7ed 100644 --- a/tests/testthat/test-spnpOR.R +++ b/tests/testthat/test-spnpOR.R @@ -2,29 +2,29 @@ context("spnpOR test") passes <- c() passes1 <- c() -for(i in 1:1){ +for (i in 1:1) { print(i) library(sl3) n <- 500 W <- runif(n, -1, 1) A <- rbinom(n, size = 1, prob = plogis(0)) - Y <- rbinom(n, size = 1, prob = plogis(A + W + A*W)) - quantile(plogis(1 + W) * (1-plogis(1 + W)) / ( plogis( W) * (1-plogis( W)))) - data <- data.table(W,A,Y) + Y <- rbinom(n, size = 1, prob = plogis(A + W + A * W)) + quantile(plogis(1 + W) * (1 - plogis(1 + W)) / (plogis(W) * (1 - plogis(W)))) + data <- data.table(W, A, Y) lrnr_Y0W <- Lrnr_gam$new() lrnr_A <- Lrnr_gam$new() - node_list <- list (W = "W", A = "A", Y= "Y") - learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) - spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W, "OR") + node_list <- list(W = "W", A = "A", Y = "Y") + learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) + spec_spCATE <- tmle3_Spec_spCausalGLM$new(~ 1 + W, "OR") suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list)) out <- out$summary - passes <- cbind(passes , out$lower <= 1 & out$upper >= 1) + passes <- cbind(passes, out$lower <= 1 & out$upper >= 1) - spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "OR") - suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + spec_spCATE <- tmle3_Spec_npCausalGLM$new(~ 1 + W, "OR") + suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list)) out <- out$summary - passes1 <- cbind(passes1 , out$lower <= 1 & out$upper >= 1) + passes1 <- cbind(passes1, out$lower <= 1 & out$upper >= 1) print(rowMeans(passes)) print(rowMeans(passes1)) From e97a39f89f0e04d29850a854f8821dfa30893c7d Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 12:17:59 -0700 Subject: [PATCH 25/65] ran make style --- R/Lrnr_glm_semiparametric.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Lrnr_glm_semiparametric.R b/R/Lrnr_glm_semiparametric.R index 3a4fa81c..5c6d0e47 100644 --- a/R/Lrnr_glm_semiparametric.R +++ b/R/Lrnr_glm_semiparametric.R @@ -41,7 +41,7 @@ Lrnr_glm_semiparametric <- R6Class( classname = "Lrnr_glm_semiparametric", inherit = Lrnr_base, portable = TRUE, class = TRUE, public = list( - initialize = function(formula_sp, lrnr_baseline, interaction_variable = "A", family = NULL, append_interaction_matrix = TRUE, return_matrix_predictions = F, ...) { + initialize = function(formula_sp, lrnr_baseline, interaction_variable = "A", family = NULL, append_interaction_matrix = TRUE, return_matrix_predictions = FALSE, ...) { params <- args_to_list() super$initialize(params = params, ...) } From 667e24c0e9122a69c5537ad5aa4bc846b69c96b4 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 12:55:28 -0700 Subject: [PATCH 26/65] wait --- tests/testthat/test-spRR.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-spRR.R b/tests/testthat/test-spRR.R index 46beb676..1edb1112 100644 --- a/tests/testthat/test-spRR.R +++ b/tests/testthat/test-spRR.R @@ -13,8 +13,8 @@ for (i in 1:1) { Y <- rpois(n, exp(A + A * W + W)) data <- data.table(W, A, Y) data - lrnr_Y0W <- Lrnr_gam$new(family = poisson()) - lrnr_A <- Lrnr_gam$new() + lrnr_Y0W <- Lrnr_glm$new(family = poisson()) + lrnr_A <- Lrnr_glm$new() node_list <- list(W = "W", A = "A", Y = "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) From 2d3aa6e3669cb6d6de855afc587a6fda1c03fed8 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 13:09:22 -0700 Subject: [PATCH 27/65] fix bug --- tests/testthat/test-spnpCATECATT.R | 4 ++-- tests/testthat/test-spnpOR.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-spnpCATECATT.R b/tests/testthat/test-spnpCATECATT.R index ef215d4f..90524925 100644 --- a/tests/testthat/test-spnpCATECATT.R +++ b/tests/testthat/test-spnpCATECATT.R @@ -13,8 +13,8 @@ for (i in 1:1) { A <- rbinom(n, size = 1, prob = plogis(W)) Y <- rnorm(n, mean = A + W, sd = 0.3) data <- data.table(W, A, Y) - lrnr_Y0W <- Lrnr_gam$new() - lrnr_A <- Lrnr_gam$new() + lrnr_Y0W <- Lrnr_glm$new() + lrnr_A <- Lrnr_glm$new() node_list <- list(W = "W", A = "A", Y = "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W, var_Y = Lrnr_mean$new()) diff --git a/tests/testthat/test-spnpOR.R b/tests/testthat/test-spnpOR.R index fca7f7ed..ec60ddd9 100644 --- a/tests/testthat/test-spnpOR.R +++ b/tests/testthat/test-spnpOR.R @@ -11,8 +11,8 @@ for (i in 1:1) { Y <- rbinom(n, size = 1, prob = plogis(A + W + A * W)) quantile(plogis(1 + W) * (1 - plogis(1 + W)) / (plogis(W) * (1 - plogis(W)))) data <- data.table(W, A, Y) - lrnr_Y0W <- Lrnr_gam$new() - lrnr_A <- Lrnr_gam$new() + lrnr_Y0W <- Lrnr_glm$new() + lrnr_A <- Lrnr_glm$new() node_list <- list(W = "W", A = "A", Y = "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) spec_spCATE <- tmle3_Spec_spCausalGLM$new(~ 1 + W, "OR") From e34939707fb2e4023e327f5491ad14ad9d86044e Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 14:24:40 -0700 Subject: [PATCH 28/65] format --- NAMESPACE | 1 - R/Lrnr_glm_semiparametric.R | 110 +++++++++++++++++++++++++++----- man/Lrnr_glm_semiparametric.Rd | 111 ++++++++++++++++++++++++++++----- vignettes/testing.Rmd | 83 ++++++++++++++++++++++++ 4 files changed, 271 insertions(+), 34 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 82e51ef0..58ad6ccf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -121,7 +121,6 @@ importFrom(sl3,make_learner) importFrom(sl3,sl3_Task) importFrom(stats,aggregate) importFrom(stats,cov) -importFrom(stats,family) importFrom(stats,glm) importFrom(stats,median) importFrom(stats,p.adjust) diff --git a/R/Lrnr_glm_semiparametric.R b/R/Lrnr_glm_semiparametric.R index 5c6d0e47..59eaa648 100644 --- a/R/Lrnr_glm_semiparametric.R +++ b/R/Lrnr_glm_semiparametric.R @@ -2,41 +2,117 @@ #' #' This learner provides fitting procedures for semiparametric generalized linear models using a user-given baseline learner and #' \code{\link[stats]{glm.fit}}. It supports models of the form `linkfun(E[Y|A,W]) = linkfun(E[Y|A=0,W]) + A * f(W)` where `A` is a binary or continuous interaction variable, -#' and `f(W)` is a user-specified parametric function (e.g. `f(W) = model.matrix(formula_sp, W)`). The baseline function `E[Y|A=0,W]` is fit using a user-specified \code{sl3}-Learner (possibly pooled over values of `A` and then projected onto the semiparametric model). +#' and `f(W)` is a user-specified parametric function (e.g. `f(W) = model.matrix(formula_sp, W)`). The baseline function `E[Y|A=0,W]` is fit using a user-specified Learner (possibly pooled over values of `A` and then projected onto the semiparametric model). #' #' @docType class #' #' @importFrom R6 R6Class -#' @importFrom stats glm predict family #' #' @export #' #' @keywords data #' -#' @return Learner object with methods for training and prediction. See -#' \code{\link{Lrnr_base}} for documentation on learners. +#' @return A learner object inheriting from \code{\link{Lrnr_base}} with +#' methods for training and prediction. For a full list of learner +#' functionality, see the complete documentation of \code{\link{Lrnr_base}}. #' -#' @format \code{\link{R6Class}} object. +#' @format An \code{\link[R6]{R6Class}} object inheriting from +#' \code{\link{Lrnr_base}}. #' #' @family Learners #' #' @section Parameters: #' \describe{ -#' \item{\code{formula_sp}}{ A \code{formula} object specifying the parametric component of the semiparametric model.} -#' \item{\code{lrnr_baseline}}{A baseline learner for estimation of the nonparametric component.} -#' \item{\code{interaction_variable}}{A interaction variable to multiply with the design matrix generated by \code{formula_sp}. If NULL then the interaction variable is treated as the value 1. -#' In many applications, this represents a binary treatment variable `A`.} -#' \item{\code{family}}{A family object whose link function specifies the type of semiparametric model (e.g. partially-linear least-squares (\code{gaussian}), partially-linear logistic regression (\code{binomial}), partially-linear relative-risk regression (\code{poisson}) } -#' \item{\code{append_interaction_matrix}}{Whether to \code{lrnr_baseline} should be fit on `cbind(task$X,A*V)` where `A` is the interaction variable and `V` is the design matrix obtained from \code{formula_sp}. +#' \item{\code{formula_sp}}{ A \code{\link{formula}} object specifying the parametric component of the semiparametric model.} +#' \item{\code{lrnr_baseline}}{A baseline learner for estimation of the nonparametric component. This can be pooled or unpooled by specifying \code{return_matrix_predictions}} +#' \item{\code{interaction_variable = "A"}}{A interaction variable name (that can be found in `training_task$data`) to multiply by the design matrix generated by \code{formula_sp}. If NULL then the interaction variable is treated identically `1`. +#' In many applications, this will be the name of a binary treatment variable (e.g. `A`).} +#' \item{\code{family = NULL}}{A family object whose link function specifies the type of semiparametric model (e.g. partially-linear least-squares (\code{\link{gaussian}), partially-linear logistic regression (\code{\link{binomial}), partially-linear log-linear regression (\code{\link{poisson}) } +#' \item{\code{append_interaction_matrix = TRUE}}{Whether \code{lrnr_baseline} should be fit on `cbind(task$X,A*V)` where `A` is the interaction variable and `V` is the design matrix obtained from \code{formula_sp}. #' Note, if `append_interaction_matrix = TRUE`, the resulting estimator will be projected onto the semiparametric model using \code{glm.fit}. #' If this is FALSE and \code{interaction_variable} is binary then the semiparametric model is learned by stratifying on \code{interaction_variable}. -#' Specifically, if FALSE, \code{lrnr_baseline} is used to estimate `E[Y|A=0,W]` by subsetting to only observations with `A` = 0. +#' Specifically, if FALSE, \code{lrnr_baseline} is used to estimate `E[Y|A=0,W]` by subsetting to only observations with `A = 0`. #' In the binary case, setting `append_interaction_matrix = TRUE` allows one to pool the learning across treatment arms and allows additive models to perform well. } -#' \item{\code{return_matrix_predictions}}{Only used if \code{interaction_variable} is binary. Whether to return a matrix output with three columns being `E[Y|A=0,W], E[Y|A=1,W], E[Y|A,W]`.} -#' +#' \item{\code{return_matrix_predictions = FALSE}}{Only used if \code{interaction_variable} is binary. Whether to return a matrix output with three columns being `E[Y|A=0,W], E[Y|A=1,W], E[Y|A,W]`.} +#' \item{\code{...}}{Not used.} #' } #' -# +#' @examples +#' library(glmnet) +#' n <- 200 +#' W <- runif(n, -1, 1) +#' A <- rbinom(n, 1, plogis(W)) +#' Y_continuous <- rnorm(n, mean = A + W, sd = 0.3) +#' Y_binary <- rbinom(n, 1, plogis(A + W)) +#' Y_count <- rpois(n, exp(A + W)) +#' data <- data.table(W, A, Y_continuous, Y_binary, Y_count) +#' +#' # Make tasks +#' task_continuous <- sl3_Task$new(data, covariates = c("A", "W"), outcome = "Y_continuous") +#' task_binary <- sl3_Task$new(data, covariates = c("A", "W"), outcome = "Y_binary") +#' task_count <- sl3_Task$new(data, covariates = c("A", "W"), outcome = "Y_count", outcome_type = "continuous") +#' +#' formula_sp <- ~ 1 + W +#' +#' # fit partially-linear least-squares regression with `append_interaction_matrix = TRUE` +#' set.seed(100) +#' lrnr_baseline <- Lrnr_glmnet$new() +#' family <- gaussian() +#' lrnr_glm_sp_gaussian <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = TRUE) +#' lrnr_glm_sp_gaussian <- lrnr_glm_sp_gaussian$train(task_continuous) +#' preds <- lrnr_glm_sp_gaussian$predict(task_continuous) +#' beta <- lrnr_glm_sp_gaussian$fit_object$coefficients +#' ## In this case, since `append_interaction_matrix = TRUE`, it is equivalent to: +#' V <- model.matrix(formula_sp, task_continuous$data) +#' X <- cbind(task_continuous$data[["W"]], task_continuous$data[["A"]] * V) +#' X0 <- cbind(task_continuous$data[["W"]], 0 * V) +#' colnames(X) <- c("W", "A", "A*W") +#' Y <- task_continuous$Y +#' set.seed(100) +#' beta_equiv <- coef(cv.glmnet(X, Y, family = "gaussian"), s = "lambda.min")[c(3, 4)] +#' ## Actually, the glmnet fit is projected onto the semiparametric model with glm.fit (no effect in this case) +#' print(beta - beta_equiv) +#' +#' # fit partially-linear least-squares regression with `append_interaction_matrix = FALSE` +#' set.seed(100) +#' lrnr_baseline <- Lrnr_glm$new(family = gaussian()) +#' family <- gaussian() +#' lrnr_glm_sp_gaussian <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = FALSE) +#' lrnr_glm_sp_gaussian <- lrnr_glm_sp_gaussian$train(task_continuous) +#' preds <- lrnr_glm_sp_gaussian$predict(task_continuous) +#' beta <- lrnr_glm_sp_gaussian$fit_object$coefficients +#' ## In this case, since `append_interaction_matrix = TRUE`, it is equivalent to: +#' ## Subset to baseline treatment arm +#' subset_to <- task_continuous$data[["A"]] == 0 +#' +#' V <- model.matrix(formula_sp, task_continuous$data) +#' X <- cbind(rep(1, n), task_continuous$data[["W"]]) +#' Y <- task_continuous$Y +#' set.seed(100) +#' beta_Y0W <- lrnr_glm_sp_gaussian$fit_object$lrnr_baseline$fit_object$coefficients +#' beta_Y0W_equiv <- coef(glm.fit(X[subset_to, , drop = F], Y[subset_to], family = gaussian())) # Subset to baseline treatment arm +#' EY0 <- X %*% beta_Y0W +#' beta_equiv <- coef(glm.fit(A * V, Y, offset = EY0, family = gaussian())) +#' print(beta_Y0W - beta_Y0W_equiv) +#' print(beta - beta_equiv) +#' +#' # fit partially-linear logistic regression +#' lrnr_baseline <- Lrnr_glmnet$new() +#' family <- binomial() +#' lrnr_glm_sp_binomial <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = TRUE) +#' lrnr_glm_sp_binomial <- lrnr_glm_sp_binomial$train(task_binary) +#' preds <- lrnr_glm_sp_binomial$predict(task_binary) +#' beta <- lrnr_glm_sp_binomial$fit_object$coefficients +#' +#' # fit partially-linear log-link (relative-risk) regression +#' lrnr_baseline <- Lrnr_glmnet$new(family = "poisson") # This setting requires that lrnr_baseline predicts nonnegative values. It is recommended to use poisson regression based learners. +#' family <- poisson() +#' lrnr_glm_sp_binomial <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = TRUE) +#' lrnr_glm_sp_binomial <- lrnr_glm_sp_binomial$train(task_count) +#' preds <- lrnr_glm_sp_binomial$predict(task_count) +#' beta <- lrnr_glm_sp_binomial$fit_object$coefficients +#' +#' # Lrnr_glm_semiparametric <- R6Class( classname = "Lrnr_glm_semiparametric", inherit = Lrnr_base, portable = TRUE, class = TRUE, @@ -113,7 +189,7 @@ Lrnr_glm_semiparametric <- R6Class( } fit_object <- list( - beta = beta, lrnr_baseline = lrnr_baseline, covariates = covariates, family = family, formula = formula, + coefficients = beta, lrnr_baseline = lrnr_baseline, covariates = covariates, family = family, formula = formula, append_interaction_matrix = append_interaction_matrix, binary = binary, task_baseline = task_baseline ) return(fit_object) @@ -122,7 +198,7 @@ Lrnr_glm_semiparametric <- R6Class( fit_object <- self$fit_object append_interaction_matrix <- fit_object$append_interaction_matrix binary <- fit_object$binary - beta <- fit_object$beta + beta <- fit_object$coefficients lrnr_baseline <- fit_object$lrnr_baseline covariates <- fit_object$covariates family <- fit_object$family diff --git a/man/Lrnr_glm_semiparametric.Rd b/man/Lrnr_glm_semiparametric.Rd index 808fbceb..ec0c378f 100644 --- a/man/Lrnr_glm_semiparametric.Rd +++ b/man/Lrnr_glm_semiparametric.Rd @@ -5,34 +5,113 @@ \alias{Lrnr_glm_semiparametric} \title{Semiparametric Generalized Linear Models} \format{ -\code{\link{R6Class}} object. +An \code{\link[R6]{R6Class}} object inheriting from +\code{\link{Lrnr_base}}. } \value{ -Learner object with methods for training and prediction. See -\code{\link{Lrnr_base}} for documentation on learners. +A learner object inheriting from \code{\link{Lrnr_base}} with +methods for training and prediction. For a full list of learner +functionality, see the complete documentation of \code{\link{Lrnr_base}}. } \description{ This learner provides fitting procedures for semiparametric generalized linear models using a user-given baseline learner and \code{\link[stats]{glm.fit}}. It supports models of the form \verb{linkfun(E[Y|A,W]) = linkfun(E[Y|A=0,W]) + A * f(W)} where \code{A} is a binary or continuous interaction variable, -and \code{f(W)} is a user-specified parametric function (e.g. \code{f(W) = model.matrix(formula_sp, W)}). The baseline function \verb{E[Y|A=0,W]} is fit using a user-specified \code{sl3}-Learner (possibly pooled over values of \code{A} and then projected onto the semiparametric model). +and \code{f(W)} is a user-specified parametric function (e.g. \code{f(W) = model.matrix(formula_sp, W)}). The baseline function \verb{E[Y|A=0,W]} is fit using a user-specified Learner (possibly pooled over values of \code{A} and then projected onto the semiparametric model). } \section{Parameters}{ \describe{ -\item{\code{formula_sp}}{ A \code{formula} object specifying the parametric component of the semiparametric model.} -\item{\code{lrnr_baseline}}{A baseline learner for estimation of the nonparametric component.} -\item{\code{interaction_variable}}{A interaction variable to multiply with the design matrix generated by \code{formula_sp}. If NULL then the interaction variable is treated as the value 1. -In many applications, this represents a binary treatment variable \code{A}.} -\item{\code{family}}{A family object whose link function specifies the type of semiparametric model (e.g. partially-linear least-squares (\code{gaussian}), partially-linear logistic regression (\code{binomial}), partially-linear relative-risk regression (\code{poisson}) } -\item{\code{append_interaction_matrix}}{Whether to \code{lrnr_baseline} should be fit on \code{cbind(task$X,A*V)} where \code{A} is the interaction variable and \code{V} is the design matrix obtained from \code{formula_sp}. -Note, if \code{append_interaction_matrix = TRUE}, the resulting estimator will be projected onto the semiparametric model using \code{glm.fit}. -If this is FALSE and \code{interaction_variable} is binary then the semiparametric model is learned by stratifying on \code{interaction_variable}. -Specifically, if FALSE, \code{lrnr_baseline} is used to estimate \verb{E[Y|A=0,W]} by subsetting to only observations with \code{A} = 0. -In the binary case, setting \code{append_interaction_matrix = TRUE} allows one to pool the learning across treatment arms and allows additive models to perform well. } -\item{\code{return_matrix_predictions}}{Only used if \code{interaction_variable} is binary. Whether to return a matrix output with three columns being \verb{E[Y|A=0,W], E[Y|A=1,W], E[Y|A,W]}.} - +\item{\code{formula_sp}}{ A \code{\link{formula}} object specifying the parametric component of the semiparametric model.} +\item{\code{lrnr_baseline}}{A baseline learner for estimation of the nonparametric component. This can be pooled or unpooled by specifying \code{return_matrix_predictions}} +\item{\code{interaction_variable = "A"}}{A interaction variable name (that can be found in \code{training_task$data}) to multiply by the design matrix generated by \code{formula_sp}. If NULL then the interaction variable is treated identically \code{1}. +In many applications, this will be the name of a binary treatment variable (e.g. \code{A}).} +\item{\code{family = NULL}}{A family object whose link function specifies the type of semiparametric model (e.g. partially-linear least-squares (\code{\link{gaussian}), partially-linear logistic regression (\code{\link{binomial}), partially-linear log-linear regression (\code{\link{poisson}) } + \item{\code{append_interaction_matrix = TRUE}}{Whether \code{lrnr_baseline} should be fit on `cbind(task$X,A*V)` where `A` is the interaction variable and `V` is the design matrix obtained from \code{formula_sp}. + Note, if `append_interaction_matrix = TRUE`, the resulting estimator will be projected onto the semiparametric model using \code{glm.fit}. + If this is FALSE and \code{interaction_variable} is binary then the semiparametric model is learned by stratifying on \code{interaction_variable}. + Specifically, if FALSE, \code{lrnr_baseline} is used to estimate `E[Y|A=0,W]` by subsetting to only observations with `A = 0`. + In the binary case, setting `append_interaction_matrix = TRUE` allows one to pool the learning across treatment arms and allows additive models to perform well. } + \item{\code{return_matrix_predictions = FALSE}}{Only used if \code{interaction_variable} is binary. Whether to return a matrix output with three columns being `E[Y|A=0,W], E[Y|A=1,W], E[Y|A,W]`.} + \item{\code{...}}{Not used.} } } +\examples{ +library(glmnet) +n <- 200 +W <- runif(n, -1, 1) +A <- rbinom(n, 1, plogis(W)) +Y_continuous <- rnorm(n, mean = A + W, sd = 0.3) +Y_binary <- rbinom(n, 1, plogis(A + W)) +Y_count <- rpois(n, exp(A + W)) +data <- data.table(W, A, Y_continuous, Y_binary, Y_count) + +# Make tasks +task_continuous <- sl3_Task$new(data, covariates = c("A", "W"), outcome = "Y_continuous") +task_binary <- sl3_Task$new(data, covariates = c("A", "W"), outcome = "Y_binary") +task_count <- sl3_Task$new(data, covariates = c("A", "W"), outcome = "Y_count", outcome_type = "continuous") + +formula_sp <- ~ 1 + W + +# fit partially-linear least-squares regression with `append_interaction_matrix = TRUE` +set.seed(100) +lrnr_baseline <- Lrnr_glmnet$new() +family <- gaussian() +lrnr_glm_sp_gaussian <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = TRUE) +lrnr_glm_sp_gaussian <- lrnr_glm_sp_gaussian$train(task_continuous) +preds <- lrnr_glm_sp_gaussian$predict(task_continuous) +beta <- lrnr_glm_sp_gaussian$fit_object$coefficients +## In this case, since `append_interaction_matrix = TRUE`, it is equivalent to: +V <- model.matrix(formula_sp, task_continuous$data) +X <- cbind(task_continuous$data[["W"]], task_continuous$data[["A"]] * V) +X0 <- cbind(task_continuous$data[["W"]], 0 * V) +colnames(X) <- c("W", "A", "A*W") +Y <- task_continuous$Y +set.seed(100) +beta_equiv <- coef(cv.glmnet(X, Y, family = "gaussian"), s = "lambda.min")[c(3, 4)] +## Actually, the glmnet fit is projected onto the semiparametric model with glm.fit (no effect in this case) +print(beta - beta_equiv) + +# fit partially-linear least-squares regression with `append_interaction_matrix = FALSE` +set.seed(100) +lrnr_baseline <- Lrnr_glm$new(family = gaussian()) +family <- gaussian() +lrnr_glm_sp_gaussian <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = FALSE) +lrnr_glm_sp_gaussian <- lrnr_glm_sp_gaussian$train(task_continuous) +preds <- lrnr_glm_sp_gaussian$predict(task_continuous) +beta <- lrnr_glm_sp_gaussian$fit_object$coefficients +## In this case, since `append_interaction_matrix = TRUE`, it is equivalent to: +## Subset to baseline treatment arm +subset_to <- task_continuous$data[["A"]] == 0 + +V <- model.matrix(formula_sp, task_continuous$data) +X <- cbind(rep(1, n), task_continuous$data[["W"]]) +Y <- task_continuous$Y +set.seed(100) +beta_Y0W <- lrnr_glm_sp_gaussian$fit_object$lrnr_baseline$fit_object$coefficients +beta_Y0W_equiv <- coef(glm.fit(X[subset_to, , drop = F], Y[subset_to], family = gaussian())) # Subset to baseline treatment arm +EY0 <- X \%*\% beta_Y0W +beta_equiv <- coef(glm.fit(A * V, Y, offset = EY0, family = gaussian())) +print(beta_Y0W - beta_Y0W_equiv) +print(beta - beta_equiv) + +# fit partially-linear logistic regression +lrnr_baseline <- Lrnr_glmnet$new() +family <- binomial() +lrnr_glm_sp_binomial <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = TRUE) +lrnr_glm_sp_binomial <- lrnr_glm_sp_binomial$train(task_binary) +preds <- lrnr_glm_sp_binomial$predict(task_binary) +beta <- lrnr_glm_sp_binomial$fit_object$coefficients + +# fit partially-linear log-link (relative-risk) regression +lrnr_baseline <- Lrnr_glmnet$new(family = "poisson") # This setting requires that lrnr_baseline predicts nonnegative values. It is recommended to use poisson regression based learners. +family <- poisson() +lrnr_glm_sp_binomial <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = TRUE) +lrnr_glm_sp_binomial <- lrnr_glm_sp_binomial$train(task_count) +preds <- lrnr_glm_sp_binomial$predict(task_count) +beta <- lrnr_glm_sp_binomial$fit_object$coefficients + +# +} \concept{Learners} \keyword{data} diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index d18b1c53..56748b6f 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -89,6 +89,89 @@ print(rowMeans(passes1)) } ``` +```{r} +#' library(glmnet) +#' n <- 200 +#' W <- runif(n, -1, 1) +#' A <- rbinom(n, 1, plogis(W)) +#' Y_continuous <- rnorm(n, mean = A+W, sd = 0.3) +#' Y_binary <- rbinom(n, 1, plogis(A + W)) +#' Y_count <- rpois(n, exp(A+W)) +#' data <- data.table(W,A,Y_continuous, Y_binary, Y_count) + +#' # Make tasks +#' task_continuous <- sl3_Task$new(data, covariates = c("A", "W"), outcome = "Y_continuous") +#' task_binary <- sl3_Task$new(data, covariates = c("A", "W"), outcome = "Y_binary") +#' task_count <- sl3_Task$new(data, covariates = c("A", "W"), outcome = "Y_count", outcome_type = "continuous") +#' +#' formula_sp <- ~ 1 + W +#' +# fit partially-linear least-squares regression with `append_interaction_matrix = TRUE` +#' set.seed(100) +#' lrnr_baseline <- Lrnr_glmnet$new() +#' family <- gaussian() +#' lrnr_glm_sp_gaussian <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = TRUE) +#' lrnr_glm_sp_gaussian <- lrnr_glm_sp_gaussian$train(task_continuous) +#' preds <- lrnr_glm_sp_gaussian$predict(task_continuous) +#' beta <- lrnr_glm_sp_gaussian$fit_object$coefficients +## In this case, since `append_interaction_matrix = TRUE`, it is equivalent to: +#' V <- model.matrix(formula_sp, task_continuous$data) +#' X <- cbind(task_continuous$data[["W"]], task_continuous$data[["A"]]*V) +#' X0 <- cbind(task_continuous$data[["W"]], 0*V) +#' colnames(X) <- c("W", "A", "A*W") +#' Y <- task_continuous$Y +#' set.seed(100) +#' beta_equiv <- coef(cv.glmnet(X, Y, family = "gaussian"), s = "lambda.min")[c(3,4)] +#' print(beta - beta_equiv) ## Actually, the glmnet fit is projected onto the semiparametric model using +#' glm.fit. This has no effect on glmnet since it is linear. For nonlinear learners like Lrnr_gam or Lrnr_xgboost this projection has an effect. +#' +#' # fit partially-linear least-squares regression with `append_interaction_matrix = FALSE` +#' set.seed(100) +#' lrnr_baseline <- Lrnr_glm$new(family = gaussian()) +#' family <- gaussian() +#' lrnr_glm_sp_gaussian <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = FALSE) +#' lrnr_glm_sp_gaussian <- lrnr_glm_sp_gaussian$train(task_continuous) +#' preds <- lrnr_glm_sp_gaussian$predict(task_continuous) +#' beta <- lrnr_glm_sp_gaussian$fit_object$coefficients +#' +## In this case, since `append_interaction_matrix = TRUE`, it is equivalent to: +#' subset_to <- task_continuous$data[["A"]]==0 # Subset to baseline treatment arm +#' V <- model.matrix(formula_sp, task_continuous$data) +#' X <- cbind(rep(1,n), task_continuous$data[["W"]]) +#' Y <- task_continuous$Y +#' set.seed(100) +#' beta_Y0W <- lrnr_glm_sp_gaussian$fit_object$lrnr_baseline$fit_object$coefficients +#' beta_Y0W_equiv <- coef(glm.fit(X[subset_to,,drop=F], Y[subset_to], family = gaussian())) # Subset to baseline treatment arm +#' EY0 <- X %*% beta_Y0W +#' beta_equiv <- coef(glm.fit(A*V, Y, offset = EY0, family = gaussian())) +#' print(beta_Y0W - beta_Y0W_equiv) +#' print(beta - beta_equiv) + + + +#' # fit partially-linear logistic regression +#' lrnr_baseline <- Lrnr_glmnet$new() +#' family <- binomial() +#' lrnr_glm_sp_binomial <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = TRUE) +#' lrnr_glm_sp_binomial <- lrnr_glm_sp_binomial$train(task_binary) +#' preds <- lrnr_glm_sp_binomial$predict(task_binary) +#' beta <- lrnr_glm_sp_binomial$fit_object$coefficients + + +#' # fit partially-linear log-link (elative-risk) regression +#' lrnr_baseline <- Lrnr_glmnet$new(family = "poisson") # This setting requires that lrnr_baseline predicts nonnegative values. It is recommended to use poisson regression based learners. +#' family <- poisson() +#' lrnr_glm_sp_binomial <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = TRUE) +#' lrnr_glm_sp_binomial <- lrnr_glm_sp_binomial$train(task_count) +#' preds <- lrnr_glm_sp_binomial$predict(task_count) +#' beta <- lrnr_glm_sp_binomial$fit_object$coefficients + + +``` + + + + ```{r} library(sl3) From de1a3f1a996b7e3990052108a5fb289f1dd87341 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 14:29:27 -0700 Subject: [PATCH 29/65] format --- R/Lrnr_glm_semiparametric.R | 2 +- R/tmle3_Spec_npCausalGLM.R | 6 +++++- R/tmle3_spec_spCausalGLM.R | 2 ++ man/Lrnr_glm_semiparametric.Rd | 2 +- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/R/Lrnr_glm_semiparametric.R b/R/Lrnr_glm_semiparametric.R index 59eaa648..9b2a7185 100644 --- a/R/Lrnr_glm_semiparametric.R +++ b/R/Lrnr_glm_semiparametric.R @@ -81,7 +81,7 @@ #' lrnr_glm_sp_gaussian <- lrnr_glm_sp_gaussian$train(task_continuous) #' preds <- lrnr_glm_sp_gaussian$predict(task_continuous) #' beta <- lrnr_glm_sp_gaussian$fit_object$coefficients -#' ## In this case, since `append_interaction_matrix = TRUE`, it is equivalent to: +#' ## In this case, since `append_interaction_matrix = FALSE`, it is equivalent to: #' ## Subset to baseline treatment arm #' subset_to <- task_continuous$data[["A"]] == 0 #' diff --git a/R/tmle3_Spec_npCausalGLM.R b/R/tmle3_Spec_npCausalGLM.R index 9e3fc59f..7c465339 100644 --- a/R/tmle3_Spec_npCausalGLM.R +++ b/R/tmle3_Spec_npCausalGLM.R @@ -25,7 +25,11 @@ tmle3_Spec_npCausalGLM <- R6Class( make_tmle_task = function(data, node_list, ...) { variable_types <- self$options$variable_types include_variance_node <- FALSE - + if (self$options$estimand == "RR") { + variable_types <- list(Y = variable_type("continuous")) + } else if (self$options$estimand == "OR") { + variable_types <- list(Y = variable_type("binomial")) + } tmle_task <- point_tx_task(data, node_list, variable_types, scale_outcome = FALSE, include_variance_node = include_variance_node) return(tmle_task) diff --git a/R/tmle3_spec_spCausalGLM.R b/R/tmle3_spec_spCausalGLM.R index bce54d77..1e3ead01 100644 --- a/R/tmle3_spec_spCausalGLM.R +++ b/R/tmle3_spec_spCausalGLM.R @@ -27,6 +27,8 @@ tmle3_Spec_spCausalGLM <- R6Class( include_variance_node <- self$options$estimand == "CATE" if (self$options$estimand == "RR") { variable_types <- list(Y = variable_type("continuous")) + } else if (self$options$estimand == "OR") { + variable_types <- list(Y = variable_type("binomial")) } tmle_task <- point_tx_task(data, node_list, variable_types, scale_outcome = FALSE, include_variance_node = include_variance_node) diff --git a/man/Lrnr_glm_semiparametric.Rd b/man/Lrnr_glm_semiparametric.Rd index ec0c378f..a3c48003 100644 --- a/man/Lrnr_glm_semiparametric.Rd +++ b/man/Lrnr_glm_semiparametric.Rd @@ -80,7 +80,7 @@ lrnr_glm_sp_gaussian <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, fam lrnr_glm_sp_gaussian <- lrnr_glm_sp_gaussian$train(task_continuous) preds <- lrnr_glm_sp_gaussian$predict(task_continuous) beta <- lrnr_glm_sp_gaussian$fit_object$coefficients -## In this case, since `append_interaction_matrix = TRUE`, it is equivalent to: +## In this case, since `append_interaction_matrix = FALSE`, it is equivalent to: ## Subset to baseline treatment arm subset_to <- task_continuous$data[["A"]] == 0 From 759dca0794fca51ed349bc91caa72adb45c7549e Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 14:51:35 -0700 Subject: [PATCH 30/65] fix documentation bug --- R/Lrnr_glm_semiparametric.R | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/R/Lrnr_glm_semiparametric.R b/R/Lrnr_glm_semiparametric.R index 9b2a7185..daa79998 100644 --- a/R/Lrnr_glm_semiparametric.R +++ b/R/Lrnr_glm_semiparametric.R @@ -23,18 +23,24 @@ #' #' @section Parameters: #' \describe{ -#' \item{\code{formula_sp}}{ A \code{\link{formula}} object specifying the parametric component of the semiparametric model.} +#' \item{\code{formula_sp}}{ A \code{\link{formula}} object specifying the parametric component of the semiparametric model. +#' } #' \item{\code{lrnr_baseline}}{A baseline learner for estimation of the nonparametric component. This can be pooled or unpooled by specifying \code{return_matrix_predictions}} #' \item{\code{interaction_variable = "A"}}{A interaction variable name (that can be found in `training_task$data`) to multiply by the design matrix generated by \code{formula_sp}. If NULL then the interaction variable is treated identically `1`. -#' In many applications, this will be the name of a binary treatment variable (e.g. `A`).} -#' \item{\code{family = NULL}}{A family object whose link function specifies the type of semiparametric model (e.g. partially-linear least-squares (\code{\link{gaussian}), partially-linear logistic regression (\code{\link{binomial}), partially-linear log-linear regression (\code{\link{poisson}) } +#' In many applications, this will be the name of a binary treatment variable (e.g. `A`). +#' } +#' \item{\code{family = NULL}}{A family object whose link function specifies the type of semiparametric model (e.g. partially-linear least-squares (\code{\link{gaussian}}), partially-linear logistic regression (\code{\link{binomial}}), partially-linear log-linear regression (\code{\link{poisson}}) +#' } #' \item{\code{append_interaction_matrix = TRUE}}{Whether \code{lrnr_baseline} should be fit on `cbind(task$X,A*V)` where `A` is the interaction variable and `V` is the design matrix obtained from \code{formula_sp}. #' Note, if `append_interaction_matrix = TRUE`, the resulting estimator will be projected onto the semiparametric model using \code{glm.fit}. #' If this is FALSE and \code{interaction_variable} is binary then the semiparametric model is learned by stratifying on \code{interaction_variable}. #' Specifically, if FALSE, \code{lrnr_baseline} is used to estimate `E[Y|A=0,W]` by subsetting to only observations with `A = 0`. -#' In the binary case, setting `append_interaction_matrix = TRUE` allows one to pool the learning across treatment arms and allows additive models to perform well. } -#' \item{\code{return_matrix_predictions = FALSE}}{Only used if \code{interaction_variable} is binary. Whether to return a matrix output with three columns being `E[Y|A=0,W], E[Y|A=1,W], E[Y|A,W]`.} -#' \item{\code{...}}{Not used.} +#' In the binary case, setting `append_interaction_matrix = TRUE` allows one to pool the learning across treatment arms and allows additive models to perform well. +#' } +#' \item{\code{return_matrix_predictions = FALSE}}{Only used if \code{interaction_variable} is binary. Whether to return a matrix output with three columns being `E[Y|A=0,W], E[Y|A=1,W], E[Y|A,W]`. +#' } +#' \item{\code{...}}{Not used. +#' } #' } #' #' @examples From d90cb6e0e9817e9ae308b2966676ca819956c57d Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 14:55:32 -0700 Subject: [PATCH 31/65] remove glm_sp docs --- R/Lrnr_glm_semiparametric.R | 114 --------------------------------- man/Lrnr_glm_semiparametric.Rd | 107 ------------------------------- 2 files changed, 221 deletions(-) diff --git a/R/Lrnr_glm_semiparametric.R b/R/Lrnr_glm_semiparametric.R index daa79998..74b363bd 100644 --- a/R/Lrnr_glm_semiparametric.R +++ b/R/Lrnr_glm_semiparametric.R @@ -4,121 +4,7 @@ #' \code{\link[stats]{glm.fit}}. It supports models of the form `linkfun(E[Y|A,W]) = linkfun(E[Y|A=0,W]) + A * f(W)` where `A` is a binary or continuous interaction variable, #' and `f(W)` is a user-specified parametric function (e.g. `f(W) = model.matrix(formula_sp, W)`). The baseline function `E[Y|A=0,W]` is fit using a user-specified Learner (possibly pooled over values of `A` and then projected onto the semiparametric model). #' -#' @docType class -#' -#' @importFrom R6 R6Class -#' #' @export -#' -#' @keywords data -#' -#' @return A learner object inheriting from \code{\link{Lrnr_base}} with -#' methods for training and prediction. For a full list of learner -#' functionality, see the complete documentation of \code{\link{Lrnr_base}}. -#' -#' @format An \code{\link[R6]{R6Class}} object inheriting from -#' \code{\link{Lrnr_base}}. -#' -#' @family Learners -#' -#' @section Parameters: -#' \describe{ -#' \item{\code{formula_sp}}{ A \code{\link{formula}} object specifying the parametric component of the semiparametric model. -#' } -#' \item{\code{lrnr_baseline}}{A baseline learner for estimation of the nonparametric component. This can be pooled or unpooled by specifying \code{return_matrix_predictions}} -#' \item{\code{interaction_variable = "A"}}{A interaction variable name (that can be found in `training_task$data`) to multiply by the design matrix generated by \code{formula_sp}. If NULL then the interaction variable is treated identically `1`. -#' In many applications, this will be the name of a binary treatment variable (e.g. `A`). -#' } -#' \item{\code{family = NULL}}{A family object whose link function specifies the type of semiparametric model (e.g. partially-linear least-squares (\code{\link{gaussian}}), partially-linear logistic regression (\code{\link{binomial}}), partially-linear log-linear regression (\code{\link{poisson}}) -#' } -#' \item{\code{append_interaction_matrix = TRUE}}{Whether \code{lrnr_baseline} should be fit on `cbind(task$X,A*V)` where `A` is the interaction variable and `V` is the design matrix obtained from \code{formula_sp}. -#' Note, if `append_interaction_matrix = TRUE`, the resulting estimator will be projected onto the semiparametric model using \code{glm.fit}. -#' If this is FALSE and \code{interaction_variable} is binary then the semiparametric model is learned by stratifying on \code{interaction_variable}. -#' Specifically, if FALSE, \code{lrnr_baseline} is used to estimate `E[Y|A=0,W]` by subsetting to only observations with `A = 0`. -#' In the binary case, setting `append_interaction_matrix = TRUE` allows one to pool the learning across treatment arms and allows additive models to perform well. -#' } -#' \item{\code{return_matrix_predictions = FALSE}}{Only used if \code{interaction_variable} is binary. Whether to return a matrix output with three columns being `E[Y|A=0,W], E[Y|A=1,W], E[Y|A,W]`. -#' } -#' \item{\code{...}}{Not used. -#' } -#' } -#' -#' @examples -#' library(glmnet) -#' n <- 200 -#' W <- runif(n, -1, 1) -#' A <- rbinom(n, 1, plogis(W)) -#' Y_continuous <- rnorm(n, mean = A + W, sd = 0.3) -#' Y_binary <- rbinom(n, 1, plogis(A + W)) -#' Y_count <- rpois(n, exp(A + W)) -#' data <- data.table(W, A, Y_continuous, Y_binary, Y_count) -#' -#' # Make tasks -#' task_continuous <- sl3_Task$new(data, covariates = c("A", "W"), outcome = "Y_continuous") -#' task_binary <- sl3_Task$new(data, covariates = c("A", "W"), outcome = "Y_binary") -#' task_count <- sl3_Task$new(data, covariates = c("A", "W"), outcome = "Y_count", outcome_type = "continuous") -#' -#' formula_sp <- ~ 1 + W -#' -#' # fit partially-linear least-squares regression with `append_interaction_matrix = TRUE` -#' set.seed(100) -#' lrnr_baseline <- Lrnr_glmnet$new() -#' family <- gaussian() -#' lrnr_glm_sp_gaussian <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = TRUE) -#' lrnr_glm_sp_gaussian <- lrnr_glm_sp_gaussian$train(task_continuous) -#' preds <- lrnr_glm_sp_gaussian$predict(task_continuous) -#' beta <- lrnr_glm_sp_gaussian$fit_object$coefficients -#' ## In this case, since `append_interaction_matrix = TRUE`, it is equivalent to: -#' V <- model.matrix(formula_sp, task_continuous$data) -#' X <- cbind(task_continuous$data[["W"]], task_continuous$data[["A"]] * V) -#' X0 <- cbind(task_continuous$data[["W"]], 0 * V) -#' colnames(X) <- c("W", "A", "A*W") -#' Y <- task_continuous$Y -#' set.seed(100) -#' beta_equiv <- coef(cv.glmnet(X, Y, family = "gaussian"), s = "lambda.min")[c(3, 4)] -#' ## Actually, the glmnet fit is projected onto the semiparametric model with glm.fit (no effect in this case) -#' print(beta - beta_equiv) -#' -#' # fit partially-linear least-squares regression with `append_interaction_matrix = FALSE` -#' set.seed(100) -#' lrnr_baseline <- Lrnr_glm$new(family = gaussian()) -#' family <- gaussian() -#' lrnr_glm_sp_gaussian <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = FALSE) -#' lrnr_glm_sp_gaussian <- lrnr_glm_sp_gaussian$train(task_continuous) -#' preds <- lrnr_glm_sp_gaussian$predict(task_continuous) -#' beta <- lrnr_glm_sp_gaussian$fit_object$coefficients -#' ## In this case, since `append_interaction_matrix = FALSE`, it is equivalent to: -#' ## Subset to baseline treatment arm -#' subset_to <- task_continuous$data[["A"]] == 0 -#' -#' V <- model.matrix(formula_sp, task_continuous$data) -#' X <- cbind(rep(1, n), task_continuous$data[["W"]]) -#' Y <- task_continuous$Y -#' set.seed(100) -#' beta_Y0W <- lrnr_glm_sp_gaussian$fit_object$lrnr_baseline$fit_object$coefficients -#' beta_Y0W_equiv <- coef(glm.fit(X[subset_to, , drop = F], Y[subset_to], family = gaussian())) # Subset to baseline treatment arm -#' EY0 <- X %*% beta_Y0W -#' beta_equiv <- coef(glm.fit(A * V, Y, offset = EY0, family = gaussian())) -#' print(beta_Y0W - beta_Y0W_equiv) -#' print(beta - beta_equiv) -#' -#' # fit partially-linear logistic regression -#' lrnr_baseline <- Lrnr_glmnet$new() -#' family <- binomial() -#' lrnr_glm_sp_binomial <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = TRUE) -#' lrnr_glm_sp_binomial <- lrnr_glm_sp_binomial$train(task_binary) -#' preds <- lrnr_glm_sp_binomial$predict(task_binary) -#' beta <- lrnr_glm_sp_binomial$fit_object$coefficients -#' -#' # fit partially-linear log-link (relative-risk) regression -#' lrnr_baseline <- Lrnr_glmnet$new(family = "poisson") # This setting requires that lrnr_baseline predicts nonnegative values. It is recommended to use poisson regression based learners. -#' family <- poisson() -#' lrnr_glm_sp_binomial <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = TRUE) -#' lrnr_glm_sp_binomial <- lrnr_glm_sp_binomial$train(task_count) -#' preds <- lrnr_glm_sp_binomial$predict(task_count) -#' beta <- lrnr_glm_sp_binomial$fit_object$coefficients -#' -#' # Lrnr_glm_semiparametric <- R6Class( classname = "Lrnr_glm_semiparametric", inherit = Lrnr_base, portable = TRUE, class = TRUE, diff --git a/man/Lrnr_glm_semiparametric.Rd b/man/Lrnr_glm_semiparametric.Rd index a3c48003..ddb891c1 100644 --- a/man/Lrnr_glm_semiparametric.Rd +++ b/man/Lrnr_glm_semiparametric.Rd @@ -1,117 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Lrnr_glm_semiparametric.R -\docType{class} \name{Lrnr_glm_semiparametric} \alias{Lrnr_glm_semiparametric} \title{Semiparametric Generalized Linear Models} -\format{ -An \code{\link[R6]{R6Class}} object inheriting from -\code{\link{Lrnr_base}}. -} -\value{ -A learner object inheriting from \code{\link{Lrnr_base}} with -methods for training and prediction. For a full list of learner -functionality, see the complete documentation of \code{\link{Lrnr_base}}. -} \description{ This learner provides fitting procedures for semiparametric generalized linear models using a user-given baseline learner and \code{\link[stats]{glm.fit}}. It supports models of the form \verb{linkfun(E[Y|A,W]) = linkfun(E[Y|A=0,W]) + A * f(W)} where \code{A} is a binary or continuous interaction variable, and \code{f(W)} is a user-specified parametric function (e.g. \code{f(W) = model.matrix(formula_sp, W)}). The baseline function \verb{E[Y|A=0,W]} is fit using a user-specified Learner (possibly pooled over values of \code{A} and then projected onto the semiparametric model). } -\section{Parameters}{ - -\describe{ -\item{\code{formula_sp}}{ A \code{\link{formula}} object specifying the parametric component of the semiparametric model.} -\item{\code{lrnr_baseline}}{A baseline learner for estimation of the nonparametric component. This can be pooled or unpooled by specifying \code{return_matrix_predictions}} -\item{\code{interaction_variable = "A"}}{A interaction variable name (that can be found in \code{training_task$data}) to multiply by the design matrix generated by \code{formula_sp}. If NULL then the interaction variable is treated identically \code{1}. -In many applications, this will be the name of a binary treatment variable (e.g. \code{A}).} -\item{\code{family = NULL}}{A family object whose link function specifies the type of semiparametric model (e.g. partially-linear least-squares (\code{\link{gaussian}), partially-linear logistic regression (\code{\link{binomial}), partially-linear log-linear regression (\code{\link{poisson}) } - \item{\code{append_interaction_matrix = TRUE}}{Whether \code{lrnr_baseline} should be fit on `cbind(task$X,A*V)` where `A` is the interaction variable and `V` is the design matrix obtained from \code{formula_sp}. - Note, if `append_interaction_matrix = TRUE`, the resulting estimator will be projected onto the semiparametric model using \code{glm.fit}. - If this is FALSE and \code{interaction_variable} is binary then the semiparametric model is learned by stratifying on \code{interaction_variable}. - Specifically, if FALSE, \code{lrnr_baseline} is used to estimate `E[Y|A=0,W]` by subsetting to only observations with `A = 0`. - In the binary case, setting `append_interaction_matrix = TRUE` allows one to pool the learning across treatment arms and allows additive models to perform well. } - \item{\code{return_matrix_predictions = FALSE}}{Only used if \code{interaction_variable} is binary. Whether to return a matrix output with three columns being `E[Y|A=0,W], E[Y|A=1,W], E[Y|A,W]`.} - \item{\code{...}}{Not used.} -} -} - -\examples{ -library(glmnet) -n <- 200 -W <- runif(n, -1, 1) -A <- rbinom(n, 1, plogis(W)) -Y_continuous <- rnorm(n, mean = A + W, sd = 0.3) -Y_binary <- rbinom(n, 1, plogis(A + W)) -Y_count <- rpois(n, exp(A + W)) -data <- data.table(W, A, Y_continuous, Y_binary, Y_count) - -# Make tasks -task_continuous <- sl3_Task$new(data, covariates = c("A", "W"), outcome = "Y_continuous") -task_binary <- sl3_Task$new(data, covariates = c("A", "W"), outcome = "Y_binary") -task_count <- sl3_Task$new(data, covariates = c("A", "W"), outcome = "Y_count", outcome_type = "continuous") - -formula_sp <- ~ 1 + W - -# fit partially-linear least-squares regression with `append_interaction_matrix = TRUE` -set.seed(100) -lrnr_baseline <- Lrnr_glmnet$new() -family <- gaussian() -lrnr_glm_sp_gaussian <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = TRUE) -lrnr_glm_sp_gaussian <- lrnr_glm_sp_gaussian$train(task_continuous) -preds <- lrnr_glm_sp_gaussian$predict(task_continuous) -beta <- lrnr_glm_sp_gaussian$fit_object$coefficients -## In this case, since `append_interaction_matrix = TRUE`, it is equivalent to: -V <- model.matrix(formula_sp, task_continuous$data) -X <- cbind(task_continuous$data[["W"]], task_continuous$data[["A"]] * V) -X0 <- cbind(task_continuous$data[["W"]], 0 * V) -colnames(X) <- c("W", "A", "A*W") -Y <- task_continuous$Y -set.seed(100) -beta_equiv <- coef(cv.glmnet(X, Y, family = "gaussian"), s = "lambda.min")[c(3, 4)] -## Actually, the glmnet fit is projected onto the semiparametric model with glm.fit (no effect in this case) -print(beta - beta_equiv) - -# fit partially-linear least-squares regression with `append_interaction_matrix = FALSE` -set.seed(100) -lrnr_baseline <- Lrnr_glm$new(family = gaussian()) -family <- gaussian() -lrnr_glm_sp_gaussian <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = FALSE) -lrnr_glm_sp_gaussian <- lrnr_glm_sp_gaussian$train(task_continuous) -preds <- lrnr_glm_sp_gaussian$predict(task_continuous) -beta <- lrnr_glm_sp_gaussian$fit_object$coefficients -## In this case, since `append_interaction_matrix = FALSE`, it is equivalent to: -## Subset to baseline treatment arm -subset_to <- task_continuous$data[["A"]] == 0 - -V <- model.matrix(formula_sp, task_continuous$data) -X <- cbind(rep(1, n), task_continuous$data[["W"]]) -Y <- task_continuous$Y -set.seed(100) -beta_Y0W <- lrnr_glm_sp_gaussian$fit_object$lrnr_baseline$fit_object$coefficients -beta_Y0W_equiv <- coef(glm.fit(X[subset_to, , drop = F], Y[subset_to], family = gaussian())) # Subset to baseline treatment arm -EY0 <- X \%*\% beta_Y0W -beta_equiv <- coef(glm.fit(A * V, Y, offset = EY0, family = gaussian())) -print(beta_Y0W - beta_Y0W_equiv) -print(beta - beta_equiv) - -# fit partially-linear logistic regression -lrnr_baseline <- Lrnr_glmnet$new() -family <- binomial() -lrnr_glm_sp_binomial <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = TRUE) -lrnr_glm_sp_binomial <- lrnr_glm_sp_binomial$train(task_binary) -preds <- lrnr_glm_sp_binomial$predict(task_binary) -beta <- lrnr_glm_sp_binomial$fit_object$coefficients - -# fit partially-linear log-link (relative-risk) regression -lrnr_baseline <- Lrnr_glmnet$new(family = "poisson") # This setting requires that lrnr_baseline predicts nonnegative values. It is recommended to use poisson regression based learners. -family <- poisson() -lrnr_glm_sp_binomial <- Lrnr_glm_semiparametric$new(formula_sp = formula_sp, family = family, lrnr_baseline = lrnr_baseline, interaction_variable = "A", append_interaction_matrix = TRUE) -lrnr_glm_sp_binomial <- lrnr_glm_sp_binomial$train(task_count) -preds <- lrnr_glm_sp_binomial$predict(task_count) -beta <- lrnr_glm_sp_binomial$fit_object$coefficients - -# -} -\concept{Learners} -\keyword{data} From 63a41a1bd75e3613c91ed80faa3944edc5368684 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 15:02:32 -0700 Subject: [PATCH 32/65] changes --- R/tmle3_spec_spCausalGLM.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/tmle3_spec_spCausalGLM.R b/R/tmle3_spec_spCausalGLM.R index 1e3ead01..10aa9e4d 100644 --- a/R/tmle3_spec_spCausalGLM.R +++ b/R/tmle3_spec_spCausalGLM.R @@ -11,13 +11,14 @@ tmle3_Spec_spCausalGLM <- R6Class( portable = TRUE, class = TRUE, public = list( - initialize = function(formula, estimand = c("CATE", "OR", "RR"), treatment_level = 1, control_level = 0, + initialize = function(formula, estimand = c("CATE", "OR", "RR"), treatment_level = 1, control_level = 0, append_interaction_matrix = TRUE, wrap_in_Lrnr_glm_sp = TRUE, likelihood_override = NULL, variable_types = NULL, ...) { estimand <- match.arg(estimand) private$.options <- list( estimand = estimand, formula = formula, treatment_level = treatment_level, control_level = control_level, + append_interaction_matrix = append_interaction_matrix, wrap_in_Lrnr_glm_sp = wrap_in_Lrnr_glm_sp, likelihood_override = likelihood_override, variable_types = variable_types, ... ) @@ -34,8 +35,10 @@ tmle3_Spec_spCausalGLM <- R6Class( return(tmle_task) }, - make_initial_likelihood = function(tmle_task, learner_list = NULL, append_interaction_matrix = TRUE, wrap_in_Lrnr_glm_sp = TRUE) { + make_initial_likelihood = function(tmle_task, learner_list = NULL ) { # Wrap baseline learner in semiparametric learner + wrap_in_Lrnr_glm_sp <- self$options$wrap_in_Lrnr_glm_sp + append_interaction_matrix <- self$options$append_interaction_matrix if (wrap_in_Lrnr_glm_sp) { learner_list[["Y"]] <- Lrnr_glm_semiparametric$new(formula_sp = self$options$formula, family = self$family, interaction_variable = "A", lrnr_baseline = learner_list[["Y"]], append_interaction_matrix = append_interaction_matrix) } From fca50f61630a76f64562e870b67c1ea86226d152 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 15:49:10 -0700 Subject: [PATCH 33/65] changes --- R/tmle3_spec_spCausalGLM.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/tmle3_spec_spCausalGLM.R b/R/tmle3_spec_spCausalGLM.R index 10aa9e4d..df438195 100644 --- a/R/tmle3_spec_spCausalGLM.R +++ b/R/tmle3_spec_spCausalGLM.R @@ -32,7 +32,7 @@ tmle3_Spec_spCausalGLM <- R6Class( variable_types <- list(Y = variable_type("binomial")) } tmle_task <- point_tx_task(data, node_list, variable_types, scale_outcome = FALSE, include_variance_node = include_variance_node) - + private$.node_list <- node_list return(tmle_task) }, make_initial_likelihood = function(tmle_task, learner_list = NULL ) { @@ -40,7 +40,7 @@ tmle3_Spec_spCausalGLM <- R6Class( wrap_in_Lrnr_glm_sp <- self$options$wrap_in_Lrnr_glm_sp append_interaction_matrix <- self$options$append_interaction_matrix if (wrap_in_Lrnr_glm_sp) { - learner_list[["Y"]] <- Lrnr_glm_semiparametric$new(formula_sp = self$options$formula, family = self$family, interaction_variable = "A", lrnr_baseline = learner_list[["Y"]], append_interaction_matrix = append_interaction_matrix) + learner_list[["Y"]] <- Lrnr_glm_semiparametric$new(formula_sp = self$options$formula, family = self$family, interaction_variable = private$.node_list$A, lrnr_baseline = learner_list[["Y"]], append_interaction_matrix = append_interaction_matrix) } # produce trained likelihood when likelihood_def provided if (!is.null(self$options$likelihood_override)) { @@ -96,6 +96,7 @@ tmle3_Spec_spCausalGLM <- R6Class( ), private = list( .options = NULL, - .families = list("CATE" = gaussian(), "RR" = poisson(), "OR" = binomial()) + .families = list("CATE" = gaussian(), "RR" = poisson(), "OR" = binomial()), + .node_list = NULL ) ) From 7253a6c4d081069e046ec4d6f07b346f84996eca Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 15:59:46 -0700 Subject: [PATCH 34/65] changes --- R/helpers_point_treatment.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/R/helpers_point_treatment.R b/R/helpers_point_treatment.R index 02ab6814..873c0331 100644 --- a/R/helpers_point_treatment.R +++ b/R/helpers_point_treatment.R @@ -94,15 +94,13 @@ point_tx_likelihood <- function(tmle_task, learner_list) { } if (tmle_task$npsem[["Y"]]$variable_type$type == "binomial") { mean_fun <- function(task, likelihood, tmle_task) { - EY <- sl3::unpack_predictions(likelihood$get_likelihood(tmle_task, "Y")) - EY <- EY[, ncol(EY)] + EY <- likelihood$get_likelihood(tmle_task, "Y") return(EY * (1 - EY)) } LF_var_Y <- LF_known$new("var_Y", mean_fun = mean_fun, base_likelihood = likelihood, type = "mean") } else { task_generator <- function(tmle_task, base_likelihood) { - EY <- sl3::unpack_predictions(base_likelihood$get_likelihood(tmle_task, "Y")) - EY <- EY[, ncol(EY)] + EY <- base_likelihood$get_likelihood(tmle_task, "Y") Y <- tmle_task$get_tmle_node("Y", format = TRUE)[[1]] outcome <- (Y - EY)^2 task <- tmle_task$get_regression_task("Y") From 436581fbc6535b3279f3cac47673a31fdccd8789 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 16:34:21 -0700 Subject: [PATCH 35/65] change to default for spCausal --- R/helpers_point_treatment.R | 4 ++-- R/tmle3_spec_spCausalGLM.R | 13 +++++++++---- vignettes/testing.Rmd | 6 +++--- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/R/helpers_point_treatment.R b/R/helpers_point_treatment.R index 873c0331..e385e965 100644 --- a/R/helpers_point_treatment.R +++ b/R/helpers_point_treatment.R @@ -93,8 +93,8 @@ point_tx_likelihood <- function(tmle_task, learner_list) { warning("Node var_Y is in npsem but no learner is provided in `learner_list`. Defaulting to glmnet with `poisson` family.") } if (tmle_task$npsem[["Y"]]$variable_type$type == "binomial") { - mean_fun <- function(task, likelihood, tmle_task) { - EY <- likelihood$get_likelihood(tmle_task, "Y") + mean_fun <- function(task, tmle_task, likelihood) { + EY <- likelihood$get_likelihood(tmle_task, "Y") return(EY * (1 - EY)) } LF_var_Y <- LF_known$new("var_Y", mean_fun = mean_fun, base_likelihood = likelihood, type = "mean") diff --git a/R/tmle3_spec_spCausalGLM.R b/R/tmle3_spec_spCausalGLM.R index df438195..19f996be 100644 --- a/R/tmle3_spec_spCausalGLM.R +++ b/R/tmle3_spec_spCausalGLM.R @@ -11,12 +11,15 @@ tmle3_Spec_spCausalGLM <- R6Class( portable = TRUE, class = TRUE, public = list( - initialize = function(formula, estimand = c("CATE", "OR", "RR"), treatment_level = 1, control_level = 0, append_interaction_matrix = TRUE, wrap_in_Lrnr_glm_sp = TRUE, + initialize = function(formula, estimand = c("CATE", "OR", "RR"), binary_outcome = FALSE, treatment_level = 1, control_level = 0, append_interaction_matrix = TRUE, wrap_in_Lrnr_glm_sp = TRUE, likelihood_override = NULL, variable_types = NULL, ...) { estimand <- match.arg(estimand) + if (binary_outcome && estimand %in% c("CATE")) { + append_interaction_matrix <- FALSE + } private$.options <- list( - estimand = estimand, formula = formula, + estimand = estimand, formula = formula, , binary_outcome = binary_outcome, treatment_level = treatment_level, control_level = control_level, append_interaction_matrix = append_interaction_matrix, wrap_in_Lrnr_glm_sp = wrap_in_Lrnr_glm_sp, likelihood_override = likelihood_override, @@ -26,8 +29,10 @@ tmle3_Spec_spCausalGLM <- R6Class( make_tmle_task = function(data, node_list, ...) { variable_types <- self$options$variable_types include_variance_node <- self$options$estimand == "CATE" - if (self$options$estimand == "RR") { + if (self$options$estimand %in% c("RR", "CATE") && !self$options$binary_outcome) { variable_types <- list(Y = variable_type("continuous")) + } else if (self$options$estimand %in% c("CATE") && self$options$binary_outcome) { + variable_types <- list(Y = variable_type("binomial")) } else if (self$options$estimand == "OR") { variable_types <- list(Y = variable_type("binomial")) } @@ -35,7 +40,7 @@ tmle3_Spec_spCausalGLM <- R6Class( private$.node_list <- node_list return(tmle_task) }, - make_initial_likelihood = function(tmle_task, learner_list = NULL ) { + make_initial_likelihood = function(tmle_task, learner_list = NULL) { # Wrap baseline learner in semiparametric learner wrap_in_Lrnr_glm_sp <- self$options$wrap_in_Lrnr_glm_sp append_interaction_matrix <- self$options$append_interaction_matrix diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index 56748b6f..b4c4ecf5 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -15,19 +15,19 @@ passes <- c() passes1 <- c() passes2 <- c() -for(i in 1:200){ +for(i in 1:1){ print(i) n <- 500 W <- runif(n, -1, 1) A <- rbinom(n, size = 1, prob = plogis(W)) -Y <- rnorm(n, mean = A*W + A+W, sd = 0.3) +Y <- rnorm(n, mean = A*W + A+W, sd = 0.3) data <- data.table(W,A,Y) lrnr_Y0W <- Lrnr_gam$new() lrnr_A <- Lrnr_gam$new() node_list <- list (W = "W", A = "A", Y= "Y") -learner_list <- list(A = lrnr_A, Y = lrnr_Y0W, var_Y = Lrnr_mean$new()) +learner_list <- list(A = lrnr_A, Y = lrnr_Y0W ) # spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1, "CATE") # out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "CATE") From 03f22c72702df8d9d945b205ef96eee599594f2d Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 16:36:56 -0700 Subject: [PATCH 36/65] change to default for spCausal --- R/tmle3_spec_spCausalGLM.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/R/tmle3_spec_spCausalGLM.R b/R/tmle3_spec_spCausalGLM.R index 19f996be..cb6d9627 100644 --- a/R/tmle3_spec_spCausalGLM.R +++ b/R/tmle3_spec_spCausalGLM.R @@ -11,13 +11,10 @@ tmle3_Spec_spCausalGLM <- R6Class( portable = TRUE, class = TRUE, public = list( - initialize = function(formula, estimand = c("CATE", "OR", "RR"), binary_outcome = FALSE, treatment_level = 1, control_level = 0, append_interaction_matrix = TRUE, wrap_in_Lrnr_glm_sp = TRUE, + initialize = function(formula, estimand = c("CATE", "OR", "RR"), binary_outcome = FALSE, treatment_level = 1, control_level = 0, append_interaction_matrix = !(binary_outcome && estimand %in% c("CATE")), wrap_in_Lrnr_glm_sp = TRUE, likelihood_override = NULL, variable_types = NULL, ...) { estimand <- match.arg(estimand) - if (binary_outcome && estimand %in% c("CATE")) { - append_interaction_matrix <- FALSE - } private$.options <- list( estimand = estimand, formula = formula, , binary_outcome = binary_outcome, treatment_level = treatment_level, control_level = control_level, From c19995e148bb64ef486fb55bf676380c47ca031f Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 16:39:14 -0700 Subject: [PATCH 37/65] change to default for spCausal --- R/tmle3_spec_spCausalGLM.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/tmle3_spec_spCausalGLM.R b/R/tmle3_spec_spCausalGLM.R index cb6d9627..c428a605 100644 --- a/R/tmle3_spec_spCausalGLM.R +++ b/R/tmle3_spec_spCausalGLM.R @@ -16,7 +16,7 @@ tmle3_Spec_spCausalGLM <- R6Class( variable_types = NULL, ...) { estimand <- match.arg(estimand) private$.options <- list( - estimand = estimand, formula = formula, , binary_outcome = binary_outcome, + estimand = estimand, formula = formula, binary_outcome = binary_outcome, treatment_level = treatment_level, control_level = control_level, append_interaction_matrix = append_interaction_matrix, wrap_in_Lrnr_glm_sp = wrap_in_Lrnr_glm_sp, likelihood_override = likelihood_override, From e4ee0f56b62ab8a26901c85b0233a83a813e2fe3 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 16:55:05 -0700 Subject: [PATCH 38/65] change to default for spCausal --- R/Param_npCATE.R | 2 +- R/Param_npCATT.R | 4 ++-- R/Param_npOR.R | 2 +- R/Param_spCATE.R | 2 +- R/Param_spOR.R | 2 +- R/Param_spRR.R | 2 +- vignettes/testing.Rmd | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/Param_npCATE.R b/R/Param_npCATE.R index 5e86cb76..d3a5aefa 100644 --- a/R/Param_npCATE.R +++ b/R/Param_npCATE.R @@ -166,7 +166,7 @@ Param_npCATE <- R6Class( ), active = list( name = function() { - param_form <- sprintf("ATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + param_form <- sprintf("CATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) return(param_form) }, cf_likelihood_treatment = function() { diff --git a/R/Param_npCATT.R b/R/Param_npCATT.R index 5ebc522a..0ae93541 100644 --- a/R/Param_npCATT.R +++ b/R/Param_npCATT.R @@ -168,7 +168,7 @@ Param_npCATT <- R6Class( ), active = list( name = function() { - param_form <- sprintf("ATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + param_form <- sprintf("CATT[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) return(param_form) }, cf_likelihood_treatment = function() { @@ -191,7 +191,7 @@ Param_npCATT <- R6Class( } ), private = list( - .type = "CATE", + .type = "CATT", .cf_likelihood_treatment = NULL, .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, diff --git a/R/Param_npOR.R b/R/Param_npOR.R index 899d4eab..5e1070c4 100644 --- a/R/Param_npOR.R +++ b/R/Param_npOR.R @@ -168,7 +168,7 @@ Param_npOR <- R6Class( ), active = list( name = function() { - param_form <- sprintf("ATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + param_form <- sprintf("log({P(Y=1|A=1,W)/P(Y=0|A=1,W)}/{P(Y=1|A=0,W)/P(Y=0|A=0,W)})") return(param_form) }, cf_likelihood_treatment = function() { diff --git a/R/Param_spCATE.R b/R/Param_spCATE.R index cc281afc..3c0a50f3 100644 --- a/R/Param_spCATE.R +++ b/R/Param_spCATE.R @@ -171,7 +171,7 @@ Param_spCATE <- R6Class( ), active = list( name = function() { - param_form <- sprintf("ATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + param_form <- sprintf("CATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) return(param_form) }, cf_likelihood_treatment = function() { diff --git a/R/Param_spOR.R b/R/Param_spOR.R index 81f1a487..5dfb159b 100644 --- a/R/Param_spOR.R +++ b/R/Param_spOR.R @@ -150,7 +150,7 @@ Param_spOR <- R6Class( ), active = list( name = function() { - param_form <- sprintf("ATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + param_form <- sprintf("log({P(Y=1|A=1,W)/P(Y=0|A=1,W)}/{P(Y=1|A=0,W)/P(Y=0|A=0,W)})") return(param_form) }, cf_likelihood_treatment = function() { diff --git a/R/Param_spRR.R b/R/Param_spRR.R index 7731dc74..a18c7881 100644 --- a/R/Param_spRR.R +++ b/R/Param_spRR.R @@ -155,7 +155,7 @@ Param_spRR <- R6Class( ), active = list( name = function() { - param_form <- sprintf("ATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + param_form <- sprintf("log(E[Y|A=1,W]/E[Y|A=0,W])") return(param_form) }, cf_likelihood_treatment = function() { diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index b4c4ecf5..c73abed7 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -21,7 +21,7 @@ for(i in 1:1){ n <- 500 W <- runif(n, -1, 1) A <- rbinom(n, size = 1, prob = plogis(W)) -Y <- rnorm(n, mean = A*W + A+W, sd = 0.3) +Y <- rbinom(n, size = 1, prob = plogis(W)) #rnorm(n, mean = A*W + A+W, sd = 0.3) data <- data.table(W,A,Y) lrnr_Y0W <- Lrnr_gam$new() lrnr_A <- Lrnr_gam$new() From da0e4a80c8efd74f061a1d095e1346c287f56132 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 17:03:45 -0700 Subject: [PATCH 39/65] change to default for spCausal --- R/Param_npOR.R | 4 ++-- R/Param_spOR.R | 4 ++-- R/Param_spRR.R | 2 +- vignettes/testing.Rmd | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/Param_npOR.R b/R/Param_npOR.R index 5e1070c4..a482f348 100644 --- a/R/Param_npOR.R +++ b/R/Param_npOR.R @@ -162,13 +162,13 @@ Param_npOR <- R6Class( IC <- EIF - result <- list(psi = beta, IC = IC, OR = OR) + result <- list(psi = beta, IC = IC, OR = OR, transform = exp) return(result) } ), active = list( name = function() { - param_form <- sprintf("log({P(Y=1|A=1,W)/P(Y=0|A=1,W)}/{P(Y=1|A=0,W)/P(Y=0|A=0,W)})") + param_form <- sprintf("logOR(Y,A|W)") return(param_form) }, cf_likelihood_treatment = function() { diff --git a/R/Param_spOR.R b/R/Param_spOR.R index 5dfb159b..c477e552 100644 --- a/R/Param_spOR.R +++ b/R/Param_spOR.R @@ -144,13 +144,13 @@ Param_spOR <- R6Class( IC <- as.matrix(EIF) - result <- list(psi = beta, IC = IC, OR = OR) + result <- list(psi = beta, IC = IC, OR = OR, transform = exp) return(result) } ), active = list( name = function() { - param_form <- sprintf("log({P(Y=1|A=1,W)/P(Y=0|A=1,W)}/{P(Y=1|A=0,W)/P(Y=0|A=0,W)})") + param_form <- sprintf("logOR(Y,A|W)") return(param_form) }, cf_likelihood_treatment = function() { diff --git a/R/Param_spRR.R b/R/Param_spRR.R index a18c7881..9d4f685a 100644 --- a/R/Param_spRR.R +++ b/R/Param_spRR.R @@ -149,7 +149,7 @@ Param_spRR <- R6Class( IC <- as.matrix(EIF) - result <- list(psi = beta, IC = IC, RR = RR) + result <- list(psi = beta, IC = IC, RR = RR, transform = exp) return(result) } ), diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index c73abed7..85ad1073 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -56,11 +56,11 @@ print(rowMeans(passes2)) -```{r, include = F} +```{r, include = T} passes <- c() passes1 <- c() -for(i in 1:200){ +for(i in 1:1){ print(i) library(sl3) n <- 500 @@ -77,7 +77,7 @@ spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W, "OR") suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list)) out <- out$summary passes <- cbind(passes , out$lower <= 1 & out$upper >= 1) - +print(out) spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "OR") suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) From fc438e426c06c745ae5af57bba1a42b68ba520f0 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 17:16:58 -0700 Subject: [PATCH 40/65] change to default for spCausal --- R/Param_npOR.R | 1 + R/tmle3_Spec_npCausalGLM.R | 4 ++-- R/tmle3_spec_spCausalGLM.R | 4 ++-- vignettes/testing.Rmd | 6 +++--- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/R/Param_npOR.R b/R/Param_npOR.R index a482f348..31d47594 100644 --- a/R/Param_npOR.R +++ b/R/Param_npOR.R @@ -157,6 +157,7 @@ Param_npOR <- R6Class( Q0 <- bound(Q0, 0.0005) Q1 <- bound(Q1, 0.0005) beta <- get_beta(W, A, self$formula_logOR, Q1, Q0, family = binomial(), weights = weights) + V <- model.matrix(self$formula_logOR, as.data.frame(W)) OR <- exp(V %*% beta) diff --git a/R/tmle3_Spec_npCausalGLM.R b/R/tmle3_Spec_npCausalGLM.R index 7c465339..0d05bb43 100644 --- a/R/tmle3_Spec_npCausalGLM.R +++ b/R/tmle3_Spec_npCausalGLM.R @@ -50,9 +50,9 @@ tmle3_Spec_npCausalGLM <- R6Class( if (self$options$estimand == "CATE" || self$options$estimand == "CATT") { updater <- tmle3_Update$new(maxit = 100, one_dimensional = FALSE, verbose = verbose, constrain_step = FALSE, bounds = c(-Inf, Inf), ...) } else if (self$options$estimand == "OR") { - updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.01, constrain_step = TRUE, bounds = 0.0025, ...) + updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.0025, constrain_step = TRUE, bounds = 0.0025, ...) } else if (self$options$estimand == "RR") { - updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.01, constrain_step = TRUE, bounds = c(0.0025, Inf), ...) + updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.0025, constrain_step = TRUE, bounds = c(0.0025, Inf), ...) } return(updater) }, diff --git a/R/tmle3_spec_spCausalGLM.R b/R/tmle3_spec_spCausalGLM.R index c428a605..e46ae5e9 100644 --- a/R/tmle3_spec_spCausalGLM.R +++ b/R/tmle3_spec_spCausalGLM.R @@ -57,9 +57,9 @@ tmle3_Spec_spCausalGLM <- R6Class( if (self$options$estimand == "CATE") { updater <- tmle3_Update$new(maxit = 100, one_dimensional = FALSE, verbose = verbose, constrain_step = FALSE, bounds = c(-Inf, Inf), ...) } else if (self$options$estimand == "OR") { - updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.01, constrain_step = TRUE, bounds = 0.0025, ...) + updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.0025, constrain_step = TRUE, bounds = 0.0025, ...) } else if (self$options$estimand == "RR") { - updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.01, constrain_step = TRUE, bounds = c(0.0025, Inf), ...) + updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.0025, constrain_step = TRUE, bounds = c(0.0025, Inf), ...) } return(updater) }, diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index 85ad1073..27289b9e 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -69,8 +69,8 @@ A <- rbinom(n, size = 1, prob = plogis(0)) Y <- rbinom(n, size = 1, prob = plogis(A + W + A*W)) quantile(plogis(1 + W) * (1-plogis(1 + W)) / ( plogis( W) * (1-plogis( W)))) data <- data.table(W,A,Y) -lrnr_Y0W <- Lrnr_gam$new() -lrnr_A <- Lrnr_gam$new() +lrnr_Y0W <- Lrnr_glm$new() +lrnr_A <- Lrnr_glm$new() node_list <- list (W = "W", A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W, "OR") @@ -83,7 +83,7 @@ spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "OR") suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) out <- out$summary passes1 <- cbind(passes1 , out$lower <= 1 & out$upper >= 1) - +print(out) print(rowMeans(passes)) print(rowMeans(passes1)) } From e21891606cadbea8c75db732c443c64a4a01c3c4 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 20:20:46 -0700 Subject: [PATCH 41/65] fix bug tmle3_fit initial est if no full fit --- R/tmle3_Fit.R | 2 +- R/tmle3_spec_spCausalGLM.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/tmle3_Fit.R b/R/tmle3_Fit.R index 7d778a1d..abf72755 100644 --- a/R/tmle3_Fit.R +++ b/R/tmle3_Fit.R @@ -35,7 +35,7 @@ tmle3_Fit <- R6Class( initial_psi <- sapply( self$tmle_params, function(tmle_param) { - tmle_param$estimates(self$tmle_task)$psi + tmle_param$estimates(self$tmle_task, ifelse(updater$cvtmle, "validation", "full"))$psi } ) private$.initial_psi <- unlist(initial_psi) diff --git a/R/tmle3_spec_spCausalGLM.R b/R/tmle3_spec_spCausalGLM.R index e46ae5e9..78eeebe7 100644 --- a/R/tmle3_spec_spCausalGLM.R +++ b/R/tmle3_spec_spCausalGLM.R @@ -16,7 +16,7 @@ tmle3_Spec_spCausalGLM <- R6Class( variable_types = NULL, ...) { estimand <- match.arg(estimand) private$.options <- list( - estimand = estimand, formula = formula, binary_outcome = binary_outcome, + estimand = estimand, formula = formula, binary_outcome = binary_outcome, treatment_level = treatment_level, control_level = control_level, append_interaction_matrix = append_interaction_matrix, wrap_in_Lrnr_glm_sp = wrap_in_Lrnr_glm_sp, likelihood_override = likelihood_override, From e094a7604ef47390bca62cdef6ad4f09426cf8cb Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 20:38:27 -0700 Subject: [PATCH 42/65] fix --- R/Param_npCATE.R | 6 ++++-- R/Param_npCATT.R | 8 +++++--- R/Param_npOR.R | 9 +++++++-- R/Param_spCATE.R | 6 ++++-- R/Param_spOR.R | 9 +++++++-- R/Param_spRR.R | 9 +++++++-- 6 files changed, 34 insertions(+), 13 deletions(-) diff --git a/R/Param_npCATE.R b/R/Param_npCATE.R index d3a5aefa..49255f16 100644 --- a/R/Param_npCATE.R +++ b/R/Param_npCATE.R @@ -57,6 +57,7 @@ Param_npCATE <- R6Class( training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") V <- model.matrix(formula_CATE, as.data.frame(W)) + private$.formula_names <- colnames(V) private$.targeted <- rep(T, ncol(V)) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { @@ -166,7 +167,7 @@ Param_npCATE <- R6Class( ), active = list( name = function() { - param_form <- sprintf("CATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + param_form <- private$.formula_names#sprintf("CATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) return(param_form) }, cf_likelihood_treatment = function() { @@ -194,6 +195,7 @@ Param_npCATE <- R6Class( .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, .formula_CATE = NULL, - .submodel = list(Y = "gaussian_identity") + .submodel = list(Y = "gaussian_identity"), + .formula_names = NULL ) ) diff --git a/R/Param_npCATT.R b/R/Param_npCATT.R index 0ae93541..a01f5897 100644 --- a/R/Param_npCATT.R +++ b/R/Param_npCATT.R @@ -53,7 +53,8 @@ Param_npCATT <- R6Class( super$initialize(observed_likelihood, list(), outcome_node) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") - V <- model.matrix(formula_CATT, as.data.frame(W)) + V <- model.matrix(formula_CATE, as.data.frame(W)) + private$.formula_names <- colnames(V) private$.targeted <- rep(T, ncol(V)) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { @@ -168,7 +169,7 @@ Param_npCATT <- R6Class( ), active = list( name = function() { - param_form <- sprintf("CATT[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + param_form <- private$.formula_names#sprintf("CATT[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) return(param_form) }, cf_likelihood_treatment = function() { @@ -196,6 +197,7 @@ Param_npCATT <- R6Class( .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, .formula_CATT = NULL, - .submodel = list(Y = "gaussian_identity") + .submodel = list(Y = "gaussian_identity"), + .formula_names = NULL ) ) diff --git a/R/Param_npOR.R b/R/Param_npOR.R index 31d47594..6f21cb5d 100644 --- a/R/Param_npOR.R +++ b/R/Param_npOR.R @@ -53,6 +53,10 @@ Param_npOR <- R6Class( public = list( initialize = function(observed_likelihood, formula_logOR = ~1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) + training_task <- self$observed_likelihood$training_task + W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") + V <- model.matrix(formula_CATE, as.data.frame(W)) + private$.formula_names <- colnames(V) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists outcome_censoring_node <- observed_likelihood$censoring_nodes[[outcome_node]] @@ -169,7 +173,7 @@ Param_npOR <- R6Class( ), active = list( name = function() { - param_form <- sprintf("logOR(Y,A|W)") + param_form <- private$.formula_names#sprintf("logOR(Y,A|W)") return(param_form) }, cf_likelihood_treatment = function() { @@ -197,6 +201,7 @@ Param_npOR <- R6Class( .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, .formula_logOR = NULL, - .submodel = list(Y = "gaussian_identity") + .submodel = list(Y = "gaussian_identity"), + .formula_names = NULL ) ) diff --git a/R/Param_spCATE.R b/R/Param_spCATE.R index 3c0a50f3..746246bc 100644 --- a/R/Param_spCATE.R +++ b/R/Param_spCATE.R @@ -54,6 +54,7 @@ Param_spCATE <- R6Class( training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") V <- model.matrix(formula_CATE, as.data.frame(W)) + private$.formula_names <- colnames(V) private$.targeted <- rep(T, ncol(V)) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { @@ -171,7 +172,7 @@ Param_spCATE <- R6Class( ), active = list( name = function() { - param_form <- sprintf("CATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + param_form <- sprintf("%s", private$.formula_names) return(param_form) }, cf_likelihood_treatment = function() { @@ -199,6 +200,7 @@ Param_spCATE <- R6Class( .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, .formula_CATE = NULL, - .submodel = list(Y = "gaussian_identity") + .submodel = list(Y = "gaussian_identity"), + .formula_names = NULL ) ) diff --git a/R/Param_spOR.R b/R/Param_spOR.R index c477e552..38daa55f 100644 --- a/R/Param_spOR.R +++ b/R/Param_spOR.R @@ -51,6 +51,10 @@ Param_spOR <- R6Class( public = list( initialize = function(observed_likelihood, formula_logOR = ~1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) + training_task <- self$observed_likelihood$training_task + W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") + V <- model.matrix(formula_CATE, as.data.frame(W)) + private$.formula_names <- colnames(V) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists outcome_censoring_node <- observed_likelihood$censoring_nodes[[outcome_node]] @@ -150,7 +154,7 @@ Param_spOR <- R6Class( ), active = list( name = function() { - param_form <- sprintf("logOR(Y,A|W)") + param_form <- .formula_names#sprintf("logOR(Y,A|W)") return(param_form) }, cf_likelihood_treatment = function() { @@ -178,6 +182,7 @@ Param_spOR <- R6Class( .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, .formula_logOR = NULL, - .submodel = list(Y = "gaussian_identity") + .submodel = list(Y = "gaussian_identity"), + .formula_names = NULL ) ) diff --git a/R/Param_spRR.R b/R/Param_spRR.R index 9d4f685a..797ccaac 100644 --- a/R/Param_spRR.R +++ b/R/Param_spRR.R @@ -52,6 +52,10 @@ Param_spRR <- R6Class( public = list( initialize = function(observed_likelihood, formula_logRR = ~1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) + training_task <- self$observed_likelihood$training_task + W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") + V <- model.matrix(formula_CATE, as.data.frame(W)) + private$.formula_names <- colnames(V) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists outcome_censoring_node <- observed_likelihood$censoring_nodes[[outcome_node]] @@ -155,7 +159,7 @@ Param_spRR <- R6Class( ), active = list( name = function() { - param_form <- sprintf("log(E[Y|A=1,W]/E[Y|A=0,W])") + param_form <- private$.formula_names#sprintf("log(E[Y|A=1,W]/E[Y|A=0,W])") return(param_form) }, cf_likelihood_treatment = function() { @@ -183,6 +187,7 @@ Param_spRR <- R6Class( .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, .formula_logRR = NULL, - .submodel = list(Y = "poisson_log") + .submodel = list(Y = "poisson_log"), + .formula_names = NULL ) ) From 701b5a52d2d542fd313570a78e691fd7fc0ae63c Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 20:41:37 -0700 Subject: [PATCH 43/65] fix --- R/Param_npOR.R | 2 +- R/Param_spOR.R | 4 ++-- R/Param_spRR.R | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/Param_npOR.R b/R/Param_npOR.R index 6f21cb5d..d53f6ded 100644 --- a/R/Param_npOR.R +++ b/R/Param_npOR.R @@ -55,7 +55,7 @@ Param_npOR <- R6Class( super$initialize(observed_likelihood, list(), outcome_node) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") - V <- model.matrix(formula_CATE, as.data.frame(W)) + V <- model.matrix(formula_logOR, as.data.frame(W)) private$.formula_names <- colnames(V) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists diff --git a/R/Param_spOR.R b/R/Param_spOR.R index 38daa55f..b8d33a81 100644 --- a/R/Param_spOR.R +++ b/R/Param_spOR.R @@ -53,7 +53,7 @@ Param_spOR <- R6Class( super$initialize(observed_likelihood, list(), outcome_node) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") - V <- model.matrix(formula_CATE, as.data.frame(W)) + V <- model.matrix(formula_logOR, as.data.frame(W)) private$.formula_names <- colnames(V) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists @@ -154,7 +154,7 @@ Param_spOR <- R6Class( ), active = list( name = function() { - param_form <- .formula_names#sprintf("logOR(Y,A|W)") + param_form <- private$.formula_names#sprintf("logOR(Y,A|W)") return(param_form) }, cf_likelihood_treatment = function() { diff --git a/R/Param_spRR.R b/R/Param_spRR.R index 797ccaac..82182fe5 100644 --- a/R/Param_spRR.R +++ b/R/Param_spRR.R @@ -54,7 +54,7 @@ Param_spRR <- R6Class( super$initialize(observed_likelihood, list(), outcome_node) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") - V <- model.matrix(formula_CATE, as.data.frame(W)) + V <- model.matrix(formula_logRR, as.data.frame(W)) private$.formula_names <- colnames(V) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists From 2fff29bef0d00ea444f5dcd9d9b6e28588fdbad2 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 21:31:09 -0700 Subject: [PATCH 44/65] fix --- R/tmle3_spec_spCausalGLM.R | 10 +++++----- vignettes/testing.Rmd | 19 ++++++++++--------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/R/tmle3_spec_spCausalGLM.R b/R/tmle3_spec_spCausalGLM.R index 78eeebe7..37cd1bf3 100644 --- a/R/tmle3_spec_spCausalGLM.R +++ b/R/tmle3_spec_spCausalGLM.R @@ -13,11 +13,11 @@ tmle3_Spec_spCausalGLM <- R6Class( public = list( initialize = function(formula, estimand = c("CATE", "OR", "RR"), binary_outcome = FALSE, treatment_level = 1, control_level = 0, append_interaction_matrix = !(binary_outcome && estimand %in% c("CATE")), wrap_in_Lrnr_glm_sp = TRUE, likelihood_override = NULL, - variable_types = NULL, ...) { + variable_types = NULL, delta_epsilon = 0.1, ...) { estimand <- match.arg(estimand) private$.options <- list( estimand = estimand, formula = formula, binary_outcome = binary_outcome, - treatment_level = treatment_level, control_level = control_level, + treatment_level = treatment_level, control_level = control_level, delta_epsilon= delta_epsilon, append_interaction_matrix = append_interaction_matrix, wrap_in_Lrnr_glm_sp = wrap_in_Lrnr_glm_sp, likelihood_override = likelihood_override, variable_types = variable_types, ... @@ -53,13 +53,13 @@ tmle3_Spec_spCausalGLM <- R6Class( return(likelihood) }, - make_updater = function(convergence_type = "sample_size", verbose = F, ...) { + make_updater = function(convergence_type = "sample_size", verbose = T, ...) { if (self$options$estimand == "CATE") { updater <- tmle3_Update$new(maxit = 100, one_dimensional = FALSE, verbose = verbose, constrain_step = FALSE, bounds = c(-Inf, Inf), ...) } else if (self$options$estimand == "OR") { - updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.0025, constrain_step = TRUE, bounds = 0.0025, ...) + updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = self$options$delta_epsilon, constrain_step = TRUE, bounds = 0.0025, ...) } else if (self$options$estimand == "RR") { - updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.0025, constrain_step = TRUE, bounds = c(0.0025, Inf), ...) + updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = self$options$delta_epsilon, constrain_step = TRUE, bounds = c(0.0025, Inf), ...) } return(updater) }, diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index 27289b9e..ca384c44 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -177,20 +177,21 @@ print(rowMeans(passes1)) library(sl3) passes <- c() -for(i in 1:200){ +for(i in 1:1){ print(i) -n <- 500 -W <- runif(n, -1, 1) -A <- rbinom(n, size = 1, prob = plogis(W)) -Y <- rpois(n, exp(A + A*W + W)) +n <- 200 +W <- as.matrix(replicate(500, runif(n, -1, 1))) +colnames(W) <- paste0("W", 1:ncol(W)) +A <- rbinom(n, size = 1, prob = plogis(W[,1])) +Y <- rpois(n, exp(A + A*W[,1] + W[,1])) data <- data.table(W,A,Y) data -lrnr_Y0W <- Lrnr_gam$new(family = poisson()) -lrnr_A <- Lrnr_gam$new() +lrnr_Y0W <- Lrnr_glmnet$new(family = "poisson") +lrnr_A <- Lrnr_glmnet$new() -node_list <- list (W = "W", A = "A", Y= "Y") +node_list <- list (W = colnames(W), A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) -spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W, "RR") +spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W1 + W2 + W3, "RR") out <- suppressWarnings(tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) out <- out$summary passes <- cbind(passes , out$lower <= 1 & out$upper >= 1) From 2244687bfa150f47a0b0d4aeb0ab1db4d3fe01a2 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 21:40:21 -0700 Subject: [PATCH 45/65] fix --- R/Param_spRR.R | 6 +++--- R/tmle3_spec_spCausalGLM.R | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/R/Param_spRR.R b/R/Param_spRR.R index 82182fe5..46eb87cc 100644 --- a/R/Param_spRR.R +++ b/R/Param_spRR.R @@ -148,12 +148,12 @@ Param_spRR <- R6Class( Q0 <- pmax(Q0, 0.0005) Q1 <- pmax(Q1, 0.0005) beta <- get_beta(W, A, self$formula_logRR, Q1, Q0, family = poisson(), weights = weights) - V <- model.matrix(self$formula_logRR, as.data.frame(W)) - RR <- as.vector(exp(V %*% beta)) + #V <- model.matrix(self$formula_logRR, as.data.frame(W)) + #RR <- as.vector(exp(V %*% beta)) IC <- as.matrix(EIF) - result <- list(psi = beta, IC = IC, RR = RR, transform = exp) + result <- list(psi = beta, IC = IC, transform = exp) return(result) } ), diff --git a/R/tmle3_spec_spCausalGLM.R b/R/tmle3_spec_spCausalGLM.R index 37cd1bf3..5bbfbc11 100644 --- a/R/tmle3_spec_spCausalGLM.R +++ b/R/tmle3_spec_spCausalGLM.R @@ -54,6 +54,9 @@ tmle3_Spec_spCausalGLM <- R6Class( return(likelihood) }, make_updater = function(convergence_type = "sample_size", verbose = T, ...) { + if(!is.null(self$options$verbose)) { + verbose <- self$options$verbose + } if (self$options$estimand == "CATE") { updater <- tmle3_Update$new(maxit = 100, one_dimensional = FALSE, verbose = verbose, constrain_step = FALSE, bounds = c(-Inf, Inf), ...) } else if (self$options$estimand == "OR") { From 59a9301d2747948ad0a801675fc9c405f2e34eb3 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 21:44:59 -0700 Subject: [PATCH 46/65] fix --- R/Param_npCATE.R | 2 +- R/Param_npCATT.R | 2 +- R/Param_npOR.R | 2 +- R/Param_spOR.R | 2 +- R/Param_spRR.R | 8 ++++---- R/tmle3_Spec_npCausalGLM.R | 5 ++++- R/tmle3_spec_spCausalGLM.R | 6 +++--- vignettes/testing.Rmd | 4 ++-- 8 files changed, 17 insertions(+), 14 deletions(-) diff --git a/R/Param_npCATE.R b/R/Param_npCATE.R index 49255f16..f2e786c8 100644 --- a/R/Param_npCATE.R +++ b/R/Param_npCATE.R @@ -167,7 +167,7 @@ Param_npCATE <- R6Class( ), active = list( name = function() { - param_form <- private$.formula_names#sprintf("CATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + param_form <- private$.formula_names # sprintf("CATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) return(param_form) }, cf_likelihood_treatment = function() { diff --git a/R/Param_npCATT.R b/R/Param_npCATT.R index a01f5897..f865f7dc 100644 --- a/R/Param_npCATT.R +++ b/R/Param_npCATT.R @@ -169,7 +169,7 @@ Param_npCATT <- R6Class( ), active = list( name = function() { - param_form <- private$.formula_names#sprintf("CATT[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + param_form <- private$.formula_names # sprintf("CATT[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) return(param_form) }, cf_likelihood_treatment = function() { diff --git a/R/Param_npOR.R b/R/Param_npOR.R index d53f6ded..64195127 100644 --- a/R/Param_npOR.R +++ b/R/Param_npOR.R @@ -173,7 +173,7 @@ Param_npOR <- R6Class( ), active = list( name = function() { - param_form <- private$.formula_names#sprintf("logOR(Y,A|W)") + param_form <- private$.formula_names # sprintf("logOR(Y,A|W)") return(param_form) }, cf_likelihood_treatment = function() { diff --git a/R/Param_spOR.R b/R/Param_spOR.R index b8d33a81..ef951b41 100644 --- a/R/Param_spOR.R +++ b/R/Param_spOR.R @@ -154,7 +154,7 @@ Param_spOR <- R6Class( ), active = list( name = function() { - param_form <- private$.formula_names#sprintf("logOR(Y,A|W)") + param_form <- private$.formula_names # sprintf("logOR(Y,A|W)") return(param_form) }, cf_likelihood_treatment = function() { diff --git a/R/Param_spRR.R b/R/Param_spRR.R index 46eb87cc..53b56061 100644 --- a/R/Param_spRR.R +++ b/R/Param_spRR.R @@ -148,18 +148,18 @@ Param_spRR <- R6Class( Q0 <- pmax(Q0, 0.0005) Q1 <- pmax(Q1, 0.0005) beta <- get_beta(W, A, self$formula_logRR, Q1, Q0, family = poisson(), weights = weights) - #V <- model.matrix(self$formula_logRR, as.data.frame(W)) - #RR <- as.vector(exp(V %*% beta)) + # V <- model.matrix(self$formula_logRR, as.data.frame(W)) + # RR <- as.vector(exp(V %*% beta)) IC <- as.matrix(EIF) - result <- list(psi = beta, IC = IC, transform = exp) + result <- list(psi = beta, IC = IC, transform = exp) return(result) } ), active = list( name = function() { - param_form <- private$.formula_names#sprintf("log(E[Y|A=1,W]/E[Y|A=0,W])") + param_form <- private$.formula_names # sprintf("log(E[Y|A=1,W]/E[Y|A=0,W])") return(param_form) }, cf_likelihood_treatment = function() { diff --git a/R/tmle3_Spec_npCausalGLM.R b/R/tmle3_Spec_npCausalGLM.R index 0d05bb43..db81bc2d 100644 --- a/R/tmle3_Spec_npCausalGLM.R +++ b/R/tmle3_Spec_npCausalGLM.R @@ -46,7 +46,10 @@ tmle3_Spec_npCausalGLM <- R6Class( return(likelihood) }, - make_updater = function(convergence_type = "sample_size", verbose = F, ...) { + make_updater = function(convergence_type = "sample_size", verbose = TRUE, ...) { + if (!is.null(self$options$verbose)) { + verbose <- self$options$verbose + } if (self$options$estimand == "CATE" || self$options$estimand == "CATT") { updater <- tmle3_Update$new(maxit = 100, one_dimensional = FALSE, verbose = verbose, constrain_step = FALSE, bounds = c(-Inf, Inf), ...) } else if (self$options$estimand == "OR") { diff --git a/R/tmle3_spec_spCausalGLM.R b/R/tmle3_spec_spCausalGLM.R index 5bbfbc11..92d75587 100644 --- a/R/tmle3_spec_spCausalGLM.R +++ b/R/tmle3_spec_spCausalGLM.R @@ -17,7 +17,7 @@ tmle3_Spec_spCausalGLM <- R6Class( estimand <- match.arg(estimand) private$.options <- list( estimand = estimand, formula = formula, binary_outcome = binary_outcome, - treatment_level = treatment_level, control_level = control_level, delta_epsilon= delta_epsilon, + treatment_level = treatment_level, control_level = control_level, delta_epsilon = delta_epsilon, append_interaction_matrix = append_interaction_matrix, wrap_in_Lrnr_glm_sp = wrap_in_Lrnr_glm_sp, likelihood_override = likelihood_override, variable_types = variable_types, ... @@ -53,8 +53,8 @@ tmle3_Spec_spCausalGLM <- R6Class( return(likelihood) }, - make_updater = function(convergence_type = "sample_size", verbose = T, ...) { - if(!is.null(self$options$verbose)) { + make_updater = function(convergence_type = "sample_size", verbose = TRUE, ...) { + if (!is.null(self$options$verbose)) { verbose <- self$options$verbose } if (self$options$estimand == "CATE") { diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index ca384c44..713eaf5c 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -179,8 +179,8 @@ library(sl3) passes <- c() for(i in 1:1){ print(i) -n <- 200 -W <- as.matrix(replicate(500, runif(n, -1, 1))) +n <- 1000 +W <- as.matrix(replicate(1000, runif(n, -1, 1))) colnames(W) <- paste0("W", 1:ncol(W)) A <- rbinom(n, size = 1, prob = plogis(W[,1])) Y <- rpois(n, exp(A + A*W[,1] + W[,1])) From 980849301a67dfafff3a2fcb16c5b0e0031f7f48 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Mon, 6 Sep 2021 23:46:42 -0700 Subject: [PATCH 47/65] add npTSM --- NAMESPACE | 1 + R/Param_npTSM.R | 178 +++++++++++++++++++++++++++++++++++++ R/tmle3_Fit.R | 4 +- R/tmle3_Spec_npCausalGLM.R | 19 ++-- R/utils.R | 1 + man/Param_ATC.Rd | 1 + man/Param_ATE.Rd | 1 + man/Param_ATT.Rd | 1 + man/Param_MSM.Rd | 1 + man/Param_TSM.Rd | 1 + man/Param_base.Rd | 1 + man/Param_delta.Rd | 1 + man/Param_mean.Rd | 1 + man/Param_npCATE.Rd | 1 + man/Param_npCATT.Rd | 1 + man/Param_npOR.Rd | 1 + man/Param_npTSM.Rd | 75 ++++++++++++++++ man/Param_spCATE.Rd | 1 + man/Param_spOR.Rd | 1 + man/Param_spRR.Rd | 1 + man/Param_stratified.Rd | 1 + man/Param_survival.Rd | 1 + man/define_param.Rd | 1 + man/tmle3_Fit.Rd | 1 + vignettes/testing.Rmd | 21 +++++ 25 files changed, 310 insertions(+), 7 deletions(-) create mode 100644 R/Param_npTSM.R create mode 100644 man/Param_npTSM.Rd diff --git a/NAMESPACE b/NAMESPACE index 58ad6ccf..f89ec465 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ export(Param_mean) export(Param_npCATE) export(Param_npCATT) export(Param_npOR) +export(Param_npTSM) export(Param_spCATE) export(Param_spOR) export(Param_spRR) diff --git a/R/Param_npTSM.R b/R/Param_npTSM.R new file mode 100644 index 00000000..374bda3c --- /dev/null +++ b/R/Param_npTSM.R @@ -0,0 +1,178 @@ +#' Nonparametric inference for user-specified parametric working models for the conditional treatment effect. +#' The true conditional average treatment effect is projected onto a parametric working model using least-squares regression. +#' Unlike \code{Param_npCATT}, this function uses all observations to compute the projection. +#' This can be used to assess heterogeneity of the average treatment effect. +#' We note that `formula_TSM = ~ 1` gives an estimator of the nonparametric average treatment effect (ATE). +#' +#' Parameter definition for the Average Treatment Effect (ATE). +#' @importFrom R6 R6Class +#' @importFrom uuid UUIDgenerate +#' @importFrom methods is +#' @family Parameters +#' @keywords data +#' +#' @return \code{Param_base} object +#' +#' @format \code{\link{R6Class}} object. +#' +#' @section Constructor: +#' \code{define_param(Param_ATT, observed_likelihood, intervention_list, ..., outcome_node)} +#' +#' \describe{ +#' \item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood +#' } +#' \item{\code{formula_TSM}}{... +#' } +#' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. +#' } +#' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. +#' } +#' \item{\code{...}}{Not currently used. +#' } +#' \item{\code{outcome_node}}{character, the name of the node that should be treated as the outcome +#' } +#' } +#' + +#' @section Fields: +#' \describe{ +#' \item{\code{cf_likelihood_treatment}}{the counterfactual likelihood for the treatment +#' } +#' \item{\code{cf_likelihood_control}}{the counterfactual likelihood for the control +#' } +#' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention +#' } +#' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention +#' } +#' } +#' @export +Param_npTSM <- R6Class( + classname = "Param_npTSM", + portable = TRUE, + class = TRUE, + inherit = Param_base, + public = list( + initialize = function(observed_likelihood, formula_TSM = ~1, intervention_list, outcome_node = "Y") { + super$initialize(observed_likelihood, list(), outcome_node) + training_task <- self$observed_likelihood$training_task + W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") + V <- model.matrix(formula_TSM, as.data.frame(W)) + private$.formula_names <- colnames(V) + private$.targeted <- rep(T, ncol(V)) + + if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { + # add delta_Y=0 to intervention lists + outcome_censoring_node <- observed_likelihood$censoring_nodes[[outcome_node]] + censoring_intervention <- define_lf(LF_static, outcome_censoring_node, value = 1) + intervention_list <- c(intervention_list, censoring_intervention) + } + private$.formula_TSM <- formula_TSM + private$.cf_likelihood <- CF_Likelihood$new(observed_likelihood, intervention_list) + }, + clever_covariates = function(tmle_task = NULL, fold_number = "full", is_training_task = TRUE) { + training_task <- self$observed_likelihood$training_task + if (is.null(tmle_task)) { + tmle_task <- training_task + } + + + cf_task1 <- self$cf_likelihood$enumerate_cf_tasks(tmle_task)[[1]] + + W <- tmle_task$get_tmle_node("W") + V <- model.matrix(self$formula_TSM, as.data.frame(W)) + A <- tmle_task$get_tmle_node("A", format = T)[[1]] + Y <- tmle_task$get_tmle_node("Y", format = T)[[1]] + W_train <- training_task$get_tmle_node("W") + V_train <- model.matrix(self$formula_TSM, as.data.frame(W_train)) + A_train <- training_task$get_tmle_node("A", format = TRUE)[[1]] + Y_train <- training_task$get_tmle_node("Y", format = TRUE)[[1]] + + intervention_nodes <- names(self$intervention_list) + pA <- self$observed_likelihood$get_likelihoods(tmle_task, intervention_nodes, fold_number) + cf_pA <- self$cf_likelihood$get_likelihoods(tmle_task, intervention_nodes, fold_number) + + if (!is.null(ncol(pA)) && ncol(pA) > 1) { + pA <- apply(pA, 1, prod) + } + if (!is.null(ncol(cf_pA)) && ncol(cf_pA) > 1) { + cf_pA <- apply(cf_pA, 1, prod) + } + + Q <- as.vector(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + Q1 <- as.vector(self$cf_likelihood$get_likelihoods(cf_task1, "Y", fold_number)) + + beta1 <- coef(glm.fit(as.matrix(V_train), Q1, family = gaussian(), weights = self$weights, intercept = F)) + Q1beta <- as.vector(V %*% beta1) + + H <- V * (cf_pA / pA) + + EIF_Y <- NULL + # Store EIF component + if (is_training_task) { + scale <- apply(V, 2, function(v) { + apply(self$weights * V * (v), 2, mean) + }) + + scaleinv <- solve(scale) + EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y - Q) + EIF_WA <- + apply(V, 2, function(v) { + self$weights * (v * (Q1 - Q1beta) - mean(self$weights * (Q1 - Q1beta))) + }) %*% scaleinv + } + + + return(list(Y = H, EIF = list(Y = EIF_Y, WA = EIF_WA))) + }, + estimates = function(tmle_task = NULL, fold_number = "full") { + if (is.null(tmle_task)) { + tmle_task <- self$observed_likelihood$training_task + } + cf_task1 <- self$cf_likelihood$enumerate_cf_tasks(tmle_task)[[1]] + + W <- tmle_task$get_tmle_node("W") + V <- model.matrix(self$formula_TSM, as.data.frame(W)) + A <- tmle_task$get_tmle_node("A", format = T)[[1]] + Y <- tmle_task$get_tmle_node("Y", format = T)[[1]] + + weights <- tmle_task$weights + # clever_covariates happen here (for this param) only, but this is repeated computation + EIF <- self$clever_covariates(tmle_task, fold_number, is_training_task = TRUE)$EIF + EIF <- EIF$Y + EIF$WA + Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) + Q1 <- self$cf_likelihood$get_likelihoods(cf_task1, "Y", fold_number) + beta1 <- coef(glm.fit(V, Q1, family = gaussian(), weights = weights, intercept = F)) + + IC <- as.matrix(EIF) + + result <- list(psi = beta1, IC = IC) + return(result) + } + ), + active = list( + name = function() { + param_form <- unlist(paste0(sprintf("E[%s_{%s}]: ", self$outcome_node, self$cf_likelihood$name), private$.formula_names)) # sprintf("CATE[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + return(param_form) + }, + cf_likelihood = function() { + return(private$.cf_likelihood) + }, + intervention_list = function() { + return(self$cf_likelihood$intervention_list) + }, + update_nodes = function() { + return(c(self$outcome_node)) + }, + formula_TSM = function() { + return(private$.formula_TSM) + } + ), + private = list( + .type = "CATE", + .cf_likelihood = NULL, + .supports_outcome_censoring = TRUE, + .formula_TSM = NULL, + .submodel = list(Y = "gaussian_identity"), + .formula_names = NULL + ) +) diff --git a/R/tmle3_Fit.R b/R/tmle3_Fit.R index abf72755..211f0815 100644 --- a/R/tmle3_Fit.R +++ b/R/tmle3_Fit.R @@ -38,7 +38,7 @@ tmle3_Fit <- R6Class( tmle_param$estimates(self$tmle_task, ifelse(updater$cvtmle, "validation", "full"))$psi } ) - private$.initial_psi <- unlist(initial_psi) + private$.initial_psi <- as.vector(unlist(initial_psi)) private$.tmle_fit(max_it) }, print = function() { @@ -68,7 +68,7 @@ tmle3_Fit <- R6Class( }, tmle_param_names = function() { if (is.null(private$.tmle_param_names)) { - private$.tmle_param_names <- unlist(sapply(self$tmle_params, `[[`, "name")) + private$.tmle_param_names <- as.vector(unlist(sapply(self$tmle_params, `[[`, "name"))) } return(private$.tmle_param_names) }, diff --git a/R/tmle3_Spec_npCausalGLM.R b/R/tmle3_Spec_npCausalGLM.R index db81bc2d..9cddce29 100644 --- a/R/tmle3_Spec_npCausalGLM.R +++ b/R/tmle3_Spec_npCausalGLM.R @@ -11,7 +11,7 @@ tmle3_Spec_npCausalGLM <- R6Class( portable = TRUE, class = TRUE, public = list( - initialize = function(formula, estimand = c("CATE", "CATT", "OR", "RR"), treatment_level = 1, control_level = 0, + initialize = function(formula, estimand = c("CATE", "CATT", "TSM", "OR"), treatment_level = 1, control_level = 0, likelihood_override = NULL, variable_types = NULL, ...) { estimand <- match.arg(estimand) @@ -50,7 +50,7 @@ tmle3_Spec_npCausalGLM <- R6Class( if (!is.null(self$options$verbose)) { verbose <- self$options$verbose } - if (self$options$estimand == "CATE" || self$options$estimand == "CATT") { + if (self$options$estimand == "CATE" || self$options$estimand == "CATT" || self$options$estimand == "TSM") { updater <- tmle3_Update$new(maxit = 100, one_dimensional = FALSE, verbose = verbose, constrain_step = FALSE, bounds = c(-Inf, Inf), ...) } else if (self$options$estimand == "OR") { updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.0025, constrain_step = TRUE, bounds = 0.0025, ...) @@ -66,14 +66,23 @@ tmle3_Spec_npCausalGLM <- R6Class( make_params = function(tmle_task, targeted_likelihood) { treatment_value <- self$options$treatment_level control_value <- self$options$control_level + formula <- self$options$formula A_levels <- tmle_task$npsem[["A"]]$variable_type$levels if (!is.null(A_levels)) { treatment_value <- factor(treatment_value, levels = A_levels) control_value <- factor(control_value, levels = A_levels) } - treatment <- define_lf(LF_static, "A", value = treatment_value) - control <- define_lf(LF_static, "A", value = control_value) - formula <- self$options$formula + if (self$options$estimand == "TSM") { + # If TSM generate params for all levels + param <- lapply(union(treatment_value, control_value), function(value) { + treatment <- define_lf(LF_static, "A", value = value) + return(Param_npTSM$new(targeted_likelihood, formula, treatment)) + }) + return(param) + } else { + treatment <- define_lf(LF_static, "A", value = treatment_value) + control <- define_lf(LF_static, "A", value = control_value) + } if (self$options$estimand == "CATE") { param <- Param_npCATE$new(targeted_likelihood, formula, treatment, control) } else if (self$options$estimand == "CATT") { diff --git a/R/utils.R b/R/utils.R index 3a054ce2..ecd1eaef 100644 --- a/R/utils.R +++ b/R/utils.R @@ -90,6 +90,7 @@ summary_from_estimates <- function(task, estimates, param_types = NULL, psi_transformed <- mapply(apply_transform, psi, transforms[index_vec]) ci_transformed <- mapply(apply_transform, ci, transforms[index_vec]) ci_transformed <- matrix(ci_transformed, nrow = nrow(ci), ncol = ncol(ci)) + summary_dt <- as.data.table(list( param_types[index_vec], param_names, init_psi, psi, se, ci, diff --git a/man/Param_ATC.Rd b/man/Param_ATC.Rd index 20818d61..62daf611 100644 --- a/man/Param_ATC.Rd +++ b/man/Param_ATC.Rd @@ -66,6 +66,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_ATE.Rd b/man/Param_ATE.Rd index d4451ac2..85ec10c8 100644 --- a/man/Param_ATE.Rd +++ b/man/Param_ATE.Rd @@ -56,6 +56,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_ATT.Rd b/man/Param_ATT.Rd index ef9c26a9..3d4d010b 100644 --- a/man/Param_ATT.Rd +++ b/man/Param_ATT.Rd @@ -66,6 +66,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_MSM.Rd b/man/Param_MSM.Rd index a6f915b1..2e0dcbe2 100644 --- a/man/Param_MSM.Rd +++ b/man/Param_MSM.Rd @@ -64,6 +64,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_TSM.Rd b/man/Param_TSM.Rd index f82946bb..79d876dd 100644 --- a/man/Param_TSM.Rd +++ b/man/Param_TSM.Rd @@ -60,6 +60,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_base.Rd b/man/Param_base.Rd index 9a03c7b0..bbd20cd9 100644 --- a/man/Param_base.Rd +++ b/man/Param_base.Rd @@ -74,6 +74,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_delta.Rd b/man/Param_delta.Rd index a1c991d6..d3af4cc2 100644 --- a/man/Param_delta.Rd +++ b/man/Param_delta.Rd @@ -21,6 +21,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_mean.Rd b/man/Param_mean.Rd index 14906ca2..86574efa 100644 --- a/man/Param_mean.Rd +++ b/man/Param_mean.Rd @@ -49,6 +49,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_npCATE.Rd b/man/Param_npCATE.Rd index fbe47726..2215b372 100644 --- a/man/Param_npCATE.Rd +++ b/man/Param_npCATE.Rd @@ -62,6 +62,7 @@ Other Parameters: \code{\link{Param_mean}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_npCATT.Rd b/man/Param_npCATT.Rd index 2571d240..82a41ca9 100644 --- a/man/Param_npCATT.Rd +++ b/man/Param_npCATT.Rd @@ -64,6 +64,7 @@ Other Parameters: \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, \code{\link{Param_npOR}}, +\code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_npOR.Rd b/man/Param_npOR.Rd index a9f9ef15..b27c0751 100644 --- a/man/Param_npOR.Rd +++ b/man/Param_npOR.Rd @@ -68,6 +68,7 @@ Other Parameters: \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, +\code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_npTSM.Rd b/man/Param_npTSM.Rd new file mode 100644 index 00000000..97dec51d --- /dev/null +++ b/man/Param_npTSM.Rd @@ -0,0 +1,75 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Param_npTSM.R +\name{Param_npTSM} +\alias{Param_npTSM} +\title{Nonparametric inference for user-specified parametric working models for the conditional treatment effect. +The true conditional average treatment effect is projected onto a parametric working model using least-squares regression. +Unlike \code{Param_npCATT}, this function uses all observations to compute the projection. +This can be used to assess heterogeneity of the average treatment effect. +We note that \code{formula_TSM = ~ 1} gives an estimator of the nonparametric average treatment effect (ATE).} +\format{ +\code{\link{R6Class}} object. +} +\value{ +\code{Param_base} object +} +\description{ +Parameter definition for the Average Treatment Effect (ATE). +} +\section{Constructor}{ + +\code{define_param(Param_ATT, observed_likelihood, intervention_list, ..., outcome_node)} + +\describe{ +\item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood +} +\item{\code{formula_TSM}}{... +} +\item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. +} +\item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. +} +\item{\code{...}}{Not currently used. +} +\item{\code{outcome_node}}{character, the name of the node that should be treated as the outcome +} +} +} + +\section{Fields}{ + +\describe{ +\item{\code{cf_likelihood_treatment}}{the counterfactual likelihood for the treatment +} +\item{\code{cf_likelihood_control}}{the counterfactual likelihood for the control +} +\item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention +} +\item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention +} +} +} + +\seealso{ +Other Parameters: +\code{\link{Param_ATC}}, +\code{\link{Param_ATE}}, +\code{\link{Param_ATT}}, +\code{\link{Param_MSM}}, +\code{\link{Param_TSM}}, +\code{\link{Param_base}}, +\code{\link{Param_delta}}, +\code{\link{Param_mean}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_npCATT}}, +\code{\link{Param_npOR}}, +\code{\link{Param_spCATE}}, +\code{\link{Param_spOR}}, +\code{\link{Param_spRR}}, +\code{\link{Param_stratified}}, +\code{\link{Param_survival}}, +\code{\link{define_param}()}, +\code{\link{tmle3_Fit}} +} +\concept{Parameters} +\keyword{data} diff --git a/man/Param_spCATE.Rd b/man/Param_spCATE.Rd index c01202e1..256de020 100644 --- a/man/Param_spCATE.Rd +++ b/man/Param_spCATE.Rd @@ -65,6 +65,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npTSM}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, \code{\link{Param_stratified}}, diff --git a/man/Param_spOR.Rd b/man/Param_spOR.Rd index e86a5b9d..c82bd926 100644 --- a/man/Param_spOR.Rd +++ b/man/Param_spOR.Rd @@ -65,6 +65,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spRR}}, \code{\link{Param_stratified}}, diff --git a/man/Param_spRR.Rd b/man/Param_spRR.Rd index eebf35c8..0d53b0e4 100644 --- a/man/Param_spRR.Rd +++ b/man/Param_spRR.Rd @@ -65,6 +65,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_stratified}}, diff --git a/man/Param_stratified.Rd b/man/Param_stratified.Rd index 98f559f0..7fee3a21 100644 --- a/man/Param_stratified.Rd +++ b/man/Param_stratified.Rd @@ -60,6 +60,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_survival.Rd b/man/Param_survival.Rd index 3a861b9d..e2a9c99a 100644 --- a/man/Param_survival.Rd +++ b/man/Param_survival.Rd @@ -51,6 +51,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/define_param.Rd b/man/define_param.Rd index ae912f7a..a29f0e70 100644 --- a/man/define_param.Rd +++ b/man/define_param.Rd @@ -27,6 +27,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/tmle3_Fit.Rd b/man/tmle3_Fit.Rd index 73fc0dd2..fb492ee6 100644 --- a/man/tmle3_Fit.Rd +++ b/man/tmle3_Fit.Rd @@ -108,6 +108,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index 713eaf5c..33046e85 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -51,6 +51,27 @@ print(rowMeans(passes)) print(rowMeans(passes1)) print(rowMeans(passes2)) } + +``` + + +```{r} +n <- 500 +W <- runif(n, -1, 1) +A <- rbinom(n, size = 1, prob = plogis(W)) +Y <- rnorm(n, mean = A*W + A+W, sd = 0.3) #rnorm(n, mean = A*W + A+W, sd = 0.3) +data <- data.table(W,A,Y) +lrnr_Y0W <- Lrnr_gam$new() +lrnr_A <- Lrnr_gam$new() + +node_list <- list (W = "W", A = "A", Y= "Y") +learner_list <- list(A = lrnr_A, Y = lrnr_Y0W ) + +spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "TSM") +suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + out <- out$summary +passes2 <- cbind(passes2 , out$lower <= 1 & out$upper >= 1) +out ``` From 3f5b0f7077aeba318e62b71cdee67ada739452d8 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Tue, 7 Sep 2021 09:23:55 -0700 Subject: [PATCH 48/65] npRR --- NAMESPACE | 1 + R/Param_npCATE.R | 2 +- R/Param_npRR.R | 202 +++++++++++++++++++++++++++++++++++++ R/Param_npTSM.R | 2 +- R/tmle3_Spec_npCausalGLM.R | 34 +++++-- R/tmle3_Update.R | 7 +- man/Param_ATC.Rd | 1 + man/Param_ATE.Rd | 1 + man/Param_ATT.Rd | 1 + man/Param_MSM.Rd | 1 + man/Param_TSM.Rd | 1 + man/Param_base.Rd | 1 + man/Param_delta.Rd | 1 + man/Param_mean.Rd | 1 + man/Param_npCATE.Rd | 1 + man/Param_npCATT.Rd | 1 + man/Param_npOR.Rd | 1 + man/Param_npRR.Rd | 76 ++++++++++++++ man/Param_npTSM.Rd | 1 + man/Param_spCATE.Rd | 1 + man/Param_spOR.Rd | 1 + man/Param_spRR.Rd | 1 + man/Param_stratified.Rd | 1 + man/Param_survival.Rd | 1 + man/define_param.Rd | 1 + man/tmle3_Fit.Rd | 1 + vignettes/testing.Rmd | 42 ++++++-- 27 files changed, 364 insertions(+), 21 deletions(-) create mode 100644 R/Param_npRR.R create mode 100644 man/Param_npRR.Rd diff --git a/NAMESPACE b/NAMESPACE index f89ec465..d512c9b3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ export(Param_mean) export(Param_npCATE) export(Param_npCATT) export(Param_npOR) +export(Param_npRR) export(Param_npTSM) export(Param_spCATE) export(Param_spOR) diff --git a/R/Param_npCATE.R b/R/Param_npCATE.R index f2e786c8..3e7cd0e1 100644 --- a/R/Param_npCATE.R +++ b/R/Param_npCATE.R @@ -117,7 +117,7 @@ Param_npCATE <- R6Class( scaleinv <- solve(scale) EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y - Q) EIF_WA <- apply(V, 2, function(v) { - self$weights * (v * (Q1 - Q0 - CATE) - mean(self$weights * (Q1 - Q0 - CATE))) + self$weights * (v * (Q1 - Q0 - CATE) - mean(v*self$weights * (Q1 - Q0 - CATE))) }) %*% scaleinv # print(dim(EIF_Y)) diff --git a/R/Param_npRR.R b/R/Param_npRR.R new file mode 100644 index 00000000..fe7106f7 --- /dev/null +++ b/R/Param_npRR.R @@ -0,0 +1,202 @@ +#' Nonparametric inference for user-specified parametric working models for the conditional treatment effect. +#' The true conditional average treatment effect is projected onto a parametric working model using least-squares regression. +#' Unlike \code{Param_npCATT}, this function uses all observations to compute the projection. +#' This can be used to assess heterogeneity of the average treatment effect. +#' We note that `formula_RR = ~ 1` gives an estimator of the nonparametric average treatment effect (ATE). +#' +#' Parameter definition for the Average Treatment Effect (ATE). +#' @importFrom R6 R6Class +#' @importFrom uuid UUIDgenerate +#' @importFrom methods is +#' @family Parameters +#' @keywords data +#' +#' @return \code{Param_base} object +#' +#' @format \code{\link{R6Class}} object. +#' +#' @section Constructor: +#' \code{define_param(Param_ATT, observed_likelihood, intervention_list, ..., outcome_node)} +#' +#' \describe{ +#' \item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood +#' } +#' \item{\code{formula_RR}}{... +#' } +#' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. +#' } +#' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. +#' } +#' \item{\code{...}}{Not currently used. +#' } +#' \item{\code{outcome_node}}{character, the name of the node that should be treated as the outcome +#' } +#' } +#' + +#' @section Fields: +#' \describe{ +#' \item{\code{cf_likelihood_treatment}}{the counterfactual likelihood for the treatment +#' } +#' \item{\code{cf_likelihood_control}}{the counterfactual likelihood for the control +#' } +#' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention +#' } +#' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention +#' } +#' } +#' @export +Param_npRR <- R6Class( + classname = "Param_npRR", + portable = TRUE, + class = TRUE, + inherit = Param_base, + public = list( + initialize = function(observed_likelihood, formula_RR = ~1, intervention_list_treatment, intervention_list_control, binary_outcome = FALSE,outcome_node = "Y") { + super$initialize(observed_likelihood, list(), outcome_node) + training_task <- self$observed_likelihood$training_task + W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") + V <- model.matrix(formula_RR, as.data.frame(W)) + private$.formula_names <- colnames(V) + private$.targeted <- rep(T, ncol(V)) + private$.binary_outcome <- binary_outcome + if(binary_outcome) { + private$.submodel = list(Y = "binomial_logit") + } else { + private$.submodel = list(Y = "poisson_log") + } + + if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { + # add delta_Y=0 to intervention lists + outcome_censoring_node <- observed_likelihood$censoring_nodes[[outcome_node]] + censoring_intervention <- define_lf(LF_static, outcome_censoring_node, value = 1) + intervention_list_treatment <- c(intervention_list_treatment, censoring_intervention) + intervention_list_control <- c(intervention_list_control, censoring_intervention) + } + private$.formula_RR <- formula_RR + private$.cf_likelihood_treatment <- CF_Likelihood$new(observed_likelihood, intervention_list_treatment) + private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) + }, + clever_covariates = function(tmle_task = NULL, fold_number = "full", is_training_task = TRUE) { + training_task <- self$observed_likelihood$training_task + if (is.null(tmle_task)) { + tmle_task <- training_task + } + + + cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] + cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] + intervention_nodes <- union(names(self$intervention_list_treatment), names(self$intervention_list_control)) + + W <- tmle_task$get_tmle_node("W") + V <- model.matrix(self$formula_RR, as.data.frame(W)) + A <- tmle_task$get_tmle_node("A", format = T)[[1]] + Yf <- tmle_task$get_tmle_node("Y", format = T) + + Y <- tmle_task$get_tmle_node("Y", format = F) + + W_train <- training_task$get_tmle_node("W") + V_train <- model.matrix(self$formula_RR, as.data.frame(W_train)) + A_train <- training_task$get_tmle_node("A", format = TRUE)[[1]] + Y_train <- training_task$get_tmle_node("Y", format = F)[[1]] + + g <- self$observed_likelihood$get_likelihoods(tmle_task, "A", fold_number) + g1 <- ifelse(A == 1, g, 1 - g) + g0 <- 1 - g1 + + Q <- as.vector(self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number)) + Q0 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number)) + Q1 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number)) + beta <- coef(glm.fit(V_train, Q1, offset = log(Q0), family = poisson(), weights = self$weights)) + RR <- as.vector(exp(V %*% beta)) + # var_Y <- self$cf_likelihood_treatment$get_likelihoods(tmle_task, "var_Y", fold_number) + # var_Y0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "var_Y", fold_number) + # var_Y1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "var_Y", fold_number) + + + H <- V * (A / g1 - (1 - A) * RR * (1 / g0)) + + EIF_Y <- NULL + # Store EIF component + if (is_training_task) { + scale <- apply(V, 2, function(v) { + apply(self$weights * V * (v) * RR * Q0, 2, mean) + }) + + scaleinv <- solve(scale) + EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y - Q) + EIF_WA <- apply(V, 2, function(v) { + self$weights * (v * (RR*Q0 - Q1) - mean(self$weights * v*(RR*Q0 - Q1))) + }) %*% scaleinv + + } + + + return(list(Y = H, EIF = list(Y = EIF_Y, WA = EIF_WA))) + }, + estimates = function(tmle_task = NULL, fold_number = "full") { + if (is.null(tmle_task)) { + tmle_task <- self$observed_likelihood$training_task + } + cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] + cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] + + W <- tmle_task$get_tmle_node("W") + A <- tmle_task$get_tmle_node("A", format = T)[[1]] + Y <- tmle_task$get_tmle_node("Y", format = F) + V <- model.matrix(self$formula_RR, as.data.frame(W)) + + weights <- tmle_task$weights + # clever_covariates happen here (for this param) only, but this is repeated computation + EIF <- self$clever_covariates(tmle_task, fold_number, is_training_task = TRUE)$EIF + EIF <- EIF$Y + EIF$WA + Q <- self$observed_likelihood$get_likelihoods(tmle_task, "Y", fold_number) + Q0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "Y", fold_number) + Q1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "Y", fold_number) + + beta <- coef(glm.fit(V, Q1, offset = log(Q0), family = poisson(), weights = self$weights)) + + + RR <- exp(V %*% beta) + + IC <- as.matrix(EIF) + + result <- list(psi = beta, IC = IC, RR = RR) + return(result) + } + ), + active = list( + name = function() { + param_form <- private$.formula_names # sprintf("RR[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + return(param_form) + }, + cf_likelihood_treatment = function() { + return(private$.cf_likelihood_treatment) + }, + cf_likelihood_control = function() { + return(private$.cf_likelihood_control) + }, + intervention_list_treatment = function() { + return(self$cf_likelihood_treatment$intervention_list) + }, + intervention_list_control = function() { + return(self$cf_likelihood_control$intervention_list) + }, + update_nodes = function() { + return(c(self$outcome_node)) + }, + formula_RR = function() { + return(private$.formula_RR) + } + ), + private = list( + .type = "RR", + .cf_likelihood_treatment = NULL, + .cf_likelihood_control = NULL, + .supports_outcome_censoring = TRUE, + .formula_RR = NULL, + .submodel = list(Y = "binomial_logit"), + .formula_names = NULL, + .binary_outcome = NULL + ) +) diff --git a/R/Param_npTSM.R b/R/Param_npTSM.R index 374bda3c..3ccd86a6 100644 --- a/R/Param_npTSM.R +++ b/R/Param_npTSM.R @@ -117,7 +117,7 @@ Param_npTSM <- R6Class( EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y - Q) EIF_WA <- apply(V, 2, function(v) { - self$weights * (v * (Q1 - Q1beta) - mean(self$weights * (Q1 - Q1beta))) + self$weights * (v * (Q1 - Q1beta) - mean(self$weights * v* (Q1 - Q1beta))) }) %*% scaleinv } diff --git a/R/tmle3_Spec_npCausalGLM.R b/R/tmle3_Spec_npCausalGLM.R index 9cddce29..9e5c6297 100644 --- a/R/tmle3_Spec_npCausalGLM.R +++ b/R/tmle3_Spec_npCausalGLM.R @@ -11,13 +11,13 @@ tmle3_Spec_npCausalGLM <- R6Class( portable = TRUE, class = TRUE, public = list( - initialize = function(formula, estimand = c("CATE", "CATT", "TSM", "OR"), treatment_level = 1, control_level = 0, + initialize = function(formula, estimand = c("CATE", "CATT", "TSM", "OR", "RR"), treatment_level = 1, control_level = 0, likelihood_override = NULL, - variable_types = NULL, ...) { + variable_types = NULL, delta_epsilon = 0.025, ...) { estimand <- match.arg(estimand) private$.options <- list( estimand = estimand, formula = formula, - treatment_level = treatment_level, control_level = control_level, + treatment_level = treatment_level, control_level = control_level, delta_epsilon = delta_epsilon, likelihood_override = likelihood_override, variable_types = variable_types, ... ) @@ -25,12 +25,22 @@ tmle3_Spec_npCausalGLM <- R6Class( make_tmle_task = function(data, node_list, ...) { variable_types <- self$options$variable_types include_variance_node <- FALSE + scale_outcome <- FALSE + binary_outcome <- all(data[[node_list$Y]] %in% c(0,1)) + private$.options$binary_outcome <- binary_outcome if (self$options$estimand == "RR") { - variable_types <- list(Y = variable_type("continuous")) + if(binary_outcome) { + type <- "binomial" + } else { + type <- "continuous" + } + variable_types <- list(Y = variable_type(type )) + #scale_outcome <- binary_outcome } else if (self$options$estimand == "OR") { variable_types <- list(Y = variable_type("binomial")) } - tmle_task <- point_tx_task(data, node_list, variable_types, scale_outcome = FALSE, include_variance_node = include_variance_node) + + tmle_task <- point_tx_task(data, node_list, variable_types, scale_outcome = scale_outcome, include_variance_node = include_variance_node) return(tmle_task) }, @@ -47,15 +57,21 @@ tmle3_Spec_npCausalGLM <- R6Class( return(likelihood) }, make_updater = function(convergence_type = "sample_size", verbose = TRUE, ...) { + delta_epsilon <- self$options$delta_epsilon if (!is.null(self$options$verbose)) { verbose <- self$options$verbose } if (self$options$estimand == "CATE" || self$options$estimand == "CATT" || self$options$estimand == "TSM") { - updater <- tmle3_Update$new(maxit = 100, one_dimensional = FALSE, verbose = verbose, constrain_step = FALSE, bounds = c(-Inf, Inf), ...) + updater <- tmle3_Update$new(maxit = 100, one_dimensional = FALSE, delta_epsilon = 1, verbose = verbose, constrain_step = FALSE, bounds = c(-Inf, Inf), ...) } else if (self$options$estimand == "OR") { - updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.0025, constrain_step = TRUE, bounds = 0.0025, ...) + updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = delta_epsilon, constrain_step = TRUE, bounds = 0.0025, ...) } else if (self$options$estimand == "RR") { - updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = 0.0025, constrain_step = TRUE, bounds = c(0.0025, Inf), ...) + if(!self$options$binary_outcome) { + bounds <- list(Y = c(0.0025, Inf), A = 0.005) + } else { + bounds <- list(Y = 0.0025, A = 0.005) + } + updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = delta_epsilon, constrain_step = TRUE, bounds = bounds, ...) } return(updater) }, @@ -90,7 +106,7 @@ tmle3_Spec_npCausalGLM <- R6Class( } else if (self$options$estimand == "OR") { param <- Param_npOR$new(targeted_likelihood, formula, treatment, control) } else if (self$options$estimand == "RR") { - param <- Param_npRR$new(targeted_likelihood, formula, treatment, control) + param <- Param_npRR$new(targeted_likelihood, formula, treatment, control, binary_outcome = self$options$binary_outcome) } return(list(param)) } diff --git a/R/tmle3_Update.R b/R/tmle3_Update.R index 3d6cd588..a39db57a 100644 --- a/R/tmle3_Update.R +++ b/R/tmle3_Update.R @@ -146,7 +146,7 @@ tmle3_Update <- R6Class( ED <- colMeans(EIF_components) - EDnormed <- ED / norm(ED, type = "2") + EDnormed <- ED / norm(ED, type = "2") * sqrt(length(ED)) #Ensures step size generalizes to many parameters better if (length(EIF_components) == 0 || ncol(EIF_components) != ncol(covariates_dt)) { stop("Not all params provide EIF components") } @@ -155,7 +155,7 @@ tmle3_Update <- R6Class( ) if (is.null(EIF_components)) { ED <- ED_from_estimates(self$current_estimates) - EDnormed <- ED / norm(ED, type = "2") + EDnormed <- ED / norm(ED, type = "2") * sqrt(length(ED)) } # covariates_dt <- self$collapse_covariates(self$current_estimates, covariates_dt) } else { @@ -283,7 +283,8 @@ tmle3_Update <- R6Class( start = rep(0, ncol(submodel_data$H)) ) }) - } else if (self$fluctuation_type == "weighted") { + Qnew <- family_object$linkinv(family_object$linkfun(submodel_data$initial) + submodel_data$H %*% coef(submodel_fit) ) + } else if (self$fluctuation_type == "weighted") { if (self$one_dimensional) { suppressWarnings({ submodel_fit <- glm(observed ~ 1, submodel_data, diff --git a/man/Param_ATC.Rd b/man/Param_ATC.Rd index 62daf611..086335b5 100644 --- a/man/Param_ATC.Rd +++ b/man/Param_ATC.Rd @@ -66,6 +66,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npRR}}, \code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_ATE.Rd b/man/Param_ATE.Rd index 85ec10c8..19beff27 100644 --- a/man/Param_ATE.Rd +++ b/man/Param_ATE.Rd @@ -56,6 +56,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npRR}}, \code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_ATT.Rd b/man/Param_ATT.Rd index 3d4d010b..45aa0e50 100644 --- a/man/Param_ATT.Rd +++ b/man/Param_ATT.Rd @@ -66,6 +66,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npRR}}, \code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_MSM.Rd b/man/Param_MSM.Rd index 2e0dcbe2..d1995b00 100644 --- a/man/Param_MSM.Rd +++ b/man/Param_MSM.Rd @@ -64,6 +64,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npRR}}, \code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_TSM.Rd b/man/Param_TSM.Rd index 79d876dd..f730ecb8 100644 --- a/man/Param_TSM.Rd +++ b/man/Param_TSM.Rd @@ -60,6 +60,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npRR}}, \code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_base.Rd b/man/Param_base.Rd index bbd20cd9..9ef4ff6f 100644 --- a/man/Param_base.Rd +++ b/man/Param_base.Rd @@ -74,6 +74,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npRR}}, \code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_delta.Rd b/man/Param_delta.Rd index d3af4cc2..58f17aaa 100644 --- a/man/Param_delta.Rd +++ b/man/Param_delta.Rd @@ -21,6 +21,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npRR}}, \code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_mean.Rd b/man/Param_mean.Rd index 86574efa..96dd7646 100644 --- a/man/Param_mean.Rd +++ b/man/Param_mean.Rd @@ -49,6 +49,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npRR}}, \code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_npCATE.Rd b/man/Param_npCATE.Rd index 2215b372..7a433c9c 100644 --- a/man/Param_npCATE.Rd +++ b/man/Param_npCATE.Rd @@ -62,6 +62,7 @@ Other Parameters: \code{\link{Param_mean}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npRR}}, \code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_npCATT.Rd b/man/Param_npCATT.Rd index 82a41ca9..fe69aca6 100644 --- a/man/Param_npCATT.Rd +++ b/man/Param_npCATT.Rd @@ -64,6 +64,7 @@ Other Parameters: \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, \code{\link{Param_npOR}}, +\code{\link{Param_npRR}}, \code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_npOR.Rd b/man/Param_npOR.Rd index b27c0751..6647e4f9 100644 --- a/man/Param_npOR.Rd +++ b/man/Param_npOR.Rd @@ -68,6 +68,7 @@ Other Parameters: \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, +\code{\link{Param_npRR}}, \code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_npRR.Rd b/man/Param_npRR.Rd new file mode 100644 index 00000000..db06f3e8 --- /dev/null +++ b/man/Param_npRR.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Param_npRR.R +\name{Param_npRR} +\alias{Param_npRR} +\title{Nonparametric inference for user-specified parametric working models for the conditional treatment effect. +The true conditional average treatment effect is projected onto a parametric working model using least-squares regression. +Unlike \code{Param_npCATT}, this function uses all observations to compute the projection. +This can be used to assess heterogeneity of the average treatment effect. +We note that \code{formula_RR = ~ 1} gives an estimator of the nonparametric average treatment effect (ATE).} +\format{ +\code{\link{R6Class}} object. +} +\value{ +\code{Param_base} object +} +\description{ +Parameter definition for the Average Treatment Effect (ATE). +} +\section{Constructor}{ + +\code{define_param(Param_ATT, observed_likelihood, intervention_list, ..., outcome_node)} + +\describe{ +\item{\code{observed_likelihood}}{A \code{\link{Likelihood}} corresponding to the observed likelihood +} +\item{\code{formula_RR}}{... +} +\item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. +} +\item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. +} +\item{\code{...}}{Not currently used. +} +\item{\code{outcome_node}}{character, the name of the node that should be treated as the outcome +} +} +} + +\section{Fields}{ + +\describe{ +\item{\code{cf_likelihood_treatment}}{the counterfactual likelihood for the treatment +} +\item{\code{cf_likelihood_control}}{the counterfactual likelihood for the control +} +\item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention +} +\item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention +} +} +} + +\seealso{ +Other Parameters: +\code{\link{Param_ATC}}, +\code{\link{Param_ATE}}, +\code{\link{Param_ATT}}, +\code{\link{Param_MSM}}, +\code{\link{Param_TSM}}, +\code{\link{Param_base}}, +\code{\link{Param_delta}}, +\code{\link{Param_mean}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_npCATT}}, +\code{\link{Param_npOR}}, +\code{\link{Param_npTSM}}, +\code{\link{Param_spCATE}}, +\code{\link{Param_spOR}}, +\code{\link{Param_spRR}}, +\code{\link{Param_stratified}}, +\code{\link{Param_survival}}, +\code{\link{define_param}()}, +\code{\link{tmle3_Fit}} +} +\concept{Parameters} +\keyword{data} diff --git a/man/Param_npTSM.Rd b/man/Param_npTSM.Rd index 97dec51d..fdd237ff 100644 --- a/man/Param_npTSM.Rd +++ b/man/Param_npTSM.Rd @@ -63,6 +63,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npRR}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_spCATE.Rd b/man/Param_spCATE.Rd index 256de020..d8ac0942 100644 --- a/man/Param_spCATE.Rd +++ b/man/Param_spCATE.Rd @@ -65,6 +65,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npRR}}, \code{\link{Param_npTSM}}, \code{\link{Param_spOR}}, \code{\link{Param_spRR}}, diff --git a/man/Param_spOR.Rd b/man/Param_spOR.Rd index c82bd926..d6bf15ef 100644 --- a/man/Param_spOR.Rd +++ b/man/Param_spOR.Rd @@ -65,6 +65,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npRR}}, \code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spRR}}, diff --git a/man/Param_spRR.Rd b/man/Param_spRR.Rd index 0d53b0e4..b2c3fa4a 100644 --- a/man/Param_spRR.Rd +++ b/man/Param_spRR.Rd @@ -65,6 +65,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npRR}}, \code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_stratified.Rd b/man/Param_stratified.Rd index 7fee3a21..8016b4ae 100644 --- a/man/Param_stratified.Rd +++ b/man/Param_stratified.Rd @@ -60,6 +60,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npRR}}, \code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/Param_survival.Rd b/man/Param_survival.Rd index e2a9c99a..ef8d0b5f 100644 --- a/man/Param_survival.Rd +++ b/man/Param_survival.Rd @@ -51,6 +51,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npRR}}, \code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/define_param.Rd b/man/define_param.Rd index a29f0e70..0fd40f66 100644 --- a/man/define_param.Rd +++ b/man/define_param.Rd @@ -27,6 +27,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npRR}}, \code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/man/tmle3_Fit.Rd b/man/tmle3_Fit.Rd index fb492ee6..0a9e38b0 100644 --- a/man/tmle3_Fit.Rd +++ b/man/tmle3_Fit.Rd @@ -108,6 +108,7 @@ Other Parameters: \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, \code{\link{Param_npOR}}, +\code{\link{Param_npRR}}, \code{\link{Param_npTSM}}, \code{\link{Param_spCATE}}, \code{\link{Param_spOR}}, diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index 33046e85..3e2285d0 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -196,15 +196,17 @@ print(rowMeans(passes1)) ```{r} library(sl3) - passes <- c() -for(i in 1:1){ +passes1 <- c() +for(i in 1:100){ + + print(i) -n <- 1000 -W <- as.matrix(replicate(1000, runif(n, -1, 1))) +n <- 200 +W <- as.matrix(replicate(2, runif(n, -1, 1))) colnames(W) <- paste0("W", 1:ncol(W)) A <- rbinom(n, size = 1, prob = plogis(W[,1])) -Y <- rpois(n, exp(A + A*W[,1] + W[,1])) +Y <- rpois(n, exp(A + A*W[,1] + W[,1])) data <- data.table(W,A,Y) data lrnr_Y0W <- Lrnr_glmnet$new(family = "poisson") @@ -212,11 +214,37 @@ lrnr_A <- Lrnr_glmnet$new() node_list <- list (W = colnames(W), A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) -spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W1 + W2 + W3, "RR") +spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1+W1 , "RR") out <- suppressWarnings(tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) out <- out$summary + out passes <- cbind(passes , out$lower <= 1 & out$upper >= 1) -print(rowMeans(passes)) +spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 , "RR", delta_epsilon = 0.01) +out <- suppressWarnings(tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + out <- out$summary + out + passes1 <- cbind(passes1 , out$lower <= 1 & out$upper >= 1) + + print(rowMeans(passes)) +print(rowMeans(passes1)) } + ``` + + +```{r} + + +out$tmle_task$npsem$Y$scale + + +spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 , "RR", delta_epsilon =0.001) +out <- suppressWarnings(tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + out <- out$summary + out + + +``` + + From 9c5509670f63760951f89827bb6c52911c55882a Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Tue, 7 Sep 2021 10:44:24 -0700 Subject: [PATCH 49/65] bounded outcomes --- R/Param_npCATE.R | 9 ++++--- R/Param_npCATT.R | 9 ++++--- R/Param_npRR.R | 14 +++++----- R/Param_npTSM.R | 12 +++++---- R/submodels.R | 3 ++- R/tmle3_Spec_npCausalGLM.R | 53 ++++++++++++++++++++++++++++++++------ R/tmle3_Update.R | 8 +++++- vignettes/testing.Rmd | 14 ++++++++++ 8 files changed, 93 insertions(+), 29 deletions(-) diff --git a/R/Param_npCATE.R b/R/Param_npCATE.R index 3e7cd0e1..e0c3bf82 100644 --- a/R/Param_npCATE.R +++ b/R/Param_npCATE.R @@ -52,7 +52,7 @@ Param_npCATE <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_CATE = ~1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { + initialize = function(observed_likelihood, formula_CATE = ~1, intervention_list_treatment, intervention_list_control, family_fluctuation = c( "binomial", "gaussian", "poisson"), outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") @@ -60,6 +60,9 @@ Param_npCATE <- R6Class( private$.formula_names <- colnames(V) private$.targeted <- rep(T, ncol(V)) + family_fluctuation <- match.arg(family_fluctuation) + private$.submodel <- list(Y=family_fluctuation) + if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists outcome_censoring_node <- observed_likelihood$censoring_nodes[[outcome_node]] @@ -85,7 +88,7 @@ Param_npCATE <- R6Class( W <- tmle_task$get_tmle_node("W") V <- model.matrix(self$formula_CATE, as.data.frame(W)) A <- tmle_task$get_tmle_node("A", format = T)[[1]] - Y <- tmle_task$get_tmle_node("Y", format = T)[[1]] + Y <- tmle_task$get_tmle_node("Y") W_train <- training_task$get_tmle_node("W") V_train <- model.matrix(self$formula_CATE, as.data.frame(W_train)) A_train <- training_task$get_tmle_node("A", format = TRUE)[[1]] @@ -136,7 +139,7 @@ Param_npCATE <- R6Class( W <- tmle_task$get_tmle_node("W") A <- tmle_task$get_tmle_node("A", format = T)[[1]] - Y <- tmle_task$get_tmle_node("Y", format = T)[[1]] + Y <- tmle_task$get_tmle_node("Y") weights <- tmle_task$weights # clever_covariates happen here (for this param) only, but this is repeated computation diff --git a/R/Param_npCATT.R b/R/Param_npCATT.R index f865f7dc..4723d35c 100644 --- a/R/Param_npCATT.R +++ b/R/Param_npCATT.R @@ -49,7 +49,7 @@ Param_npCATT <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_CATT = ~1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { + initialize = function(observed_likelihood, formula_CATT = ~1, intervention_list_treatment, intervention_list_control, family_fluctuation = c( "binomial", "gaussian", "poisson"), outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") @@ -57,6 +57,9 @@ Param_npCATT <- R6Class( private$.formula_names <- colnames(V) private$.targeted <- rep(T, ncol(V)) + family_fluctuation <- match.arg(family_fluctuation) + private$.submodel <- list(Y=family_fluctuation) + if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists outcome_censoring_node <- observed_likelihood$censoring_nodes[[outcome_node]] @@ -82,7 +85,7 @@ Param_npCATT <- R6Class( W <- tmle_task$get_tmle_node("W") V <- model.matrix(self$formula_CATT, as.data.frame(W)) A <- tmle_task$get_tmle_node("A", format = T)[[1]] - Y <- tmle_task$get_tmle_node("Y", format = T)[[1]] + Y <- tmle_task$get_tmle_node("Y") W_train <- training_task$get_tmle_node("W") V_train <- model.matrix(self$formula_CATT, as.data.frame(W_train)) A_train <- training_task$get_tmle_node("A", format = TRUE)[[1]] @@ -138,7 +141,7 @@ Param_npCATT <- R6Class( W <- tmle_task$get_tmle_node("W") A <- tmle_task$get_tmle_node("A", format = T)[[1]] - Y <- tmle_task$get_tmle_node("Y", format = T)[[1]] + Y <- tmle_task$get_tmle_node("Y") weights <- tmle_task$weights # clever_covariates happen here (for this param) only, but this is repeated computation diff --git a/R/Param_npRR.R b/R/Param_npRR.R index fe7106f7..ecc80dfc 100644 --- a/R/Param_npRR.R +++ b/R/Param_npRR.R @@ -52,19 +52,17 @@ Param_npRR <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_RR = ~1, intervention_list_treatment, intervention_list_control, binary_outcome = FALSE,outcome_node = "Y") { + initialize = function(observed_likelihood, formula_RR = ~1, intervention_list_treatment, intervention_list_control, binary_outcome = FALSE, family_fluctuation = c("poisson", "binomial"),outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) + family_fluctuation <- match.arg(family_fluctuation) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") V <- model.matrix(formula_RR, as.data.frame(W)) private$.formula_names <- colnames(V) private$.targeted <- rep(T, ncol(V)) private$.binary_outcome <- binary_outcome - if(binary_outcome) { - private$.submodel = list(Y = "binomial_logit") - } else { - private$.submodel = list(Y = "poisson_log") - } + private$.submodel <- list(Y=family_fluctuation) + if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists @@ -91,14 +89,14 @@ Param_npRR <- R6Class( W <- tmle_task$get_tmle_node("W") V <- model.matrix(self$formula_RR, as.data.frame(W)) A <- tmle_task$get_tmle_node("A", format = T)[[1]] - Yf <- tmle_task$get_tmle_node("Y", format = T) + Y <- tmle_task$get_tmle_node("Y", format = F) W_train <- training_task$get_tmle_node("W") V_train <- model.matrix(self$formula_RR, as.data.frame(W_train)) A_train <- training_task$get_tmle_node("A", format = TRUE)[[1]] - Y_train <- training_task$get_tmle_node("Y", format = F)[[1]] + Y_train <- training_task$get_tmle_node("Y", format = F) g <- self$observed_likelihood$get_likelihoods(tmle_task, "A", fold_number) g1 <- ifelse(A == 1, g, 1 - g) diff --git a/R/Param_npTSM.R b/R/Param_npTSM.R index 3ccd86a6..b8e8cf84 100644 --- a/R/Param_npTSM.R +++ b/R/Param_npTSM.R @@ -52,7 +52,9 @@ Param_npTSM <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_TSM = ~1, intervention_list, outcome_node = "Y") { + initialize = function(observed_likelihood, formula_TSM = ~1, intervention_list, family_fluctuation = c( "binomial", "gaussian", "poisson"), outcome_node = "Y") { + family_fluctuation <- match.arg(family_fluctuation) + private$.submodel <- list(Y=family_fluctuation) super$initialize(observed_likelihood, list(), outcome_node) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") @@ -81,7 +83,7 @@ Param_npTSM <- R6Class( W <- tmle_task$get_tmle_node("W") V <- model.matrix(self$formula_TSM, as.data.frame(W)) A <- tmle_task$get_tmle_node("A", format = T)[[1]] - Y <- tmle_task$get_tmle_node("Y", format = T)[[1]] + Y <- tmle_task$get_tmle_node("Y") W_train <- training_task$get_tmle_node("W") V_train <- model.matrix(self$formula_TSM, as.data.frame(W_train)) A_train <- training_task$get_tmle_node("A", format = TRUE)[[1]] @@ -133,7 +135,7 @@ Param_npTSM <- R6Class( W <- tmle_task$get_tmle_node("W") V <- model.matrix(self$formula_TSM, as.data.frame(W)) A <- tmle_task$get_tmle_node("A", format = T)[[1]] - Y <- tmle_task$get_tmle_node("Y", format = T)[[1]] + Y <- tmle_task$get_tmle_node("Y") weights <- tmle_task$weights # clever_covariates happen here (for this param) only, but this is repeated computation @@ -168,11 +170,11 @@ Param_npTSM <- R6Class( } ), private = list( - .type = "CATE", + .type = "TSM", .cf_likelihood = NULL, .supports_outcome_censoring = TRUE, .formula_TSM = NULL, - .submodel = list(Y = "gaussian_identity"), + .submodel = list(Y = "binomial_identity"), .formula_names = NULL ) ) diff --git a/R/submodels.R b/R/submodels.R index d257d3a2..d5bf7f69 100644 --- a/R/submodels.R +++ b/R/submodels.R @@ -38,7 +38,8 @@ submodel_logistic_switch <- function(eps, offset, X, observed) { #' @param v ... #' @export loss_function_loglik_binomial <- function(estimate, observed, weights = NULL, likelihood = NULL, tmle_task = NULL, fold_number = NULL) { - loss <- -1 * ifelse(observed == 1, log(estimate), log(1 - estimate)) + #loss <- -1 * ifelse(observed == 1, log(estimate), log(1 - estimate)) + loss <- -1 * (observed * log(estimate) + (1-observed) * log(1-estimate)) if (!is.null(weights)) { loss <- weights * loss } diff --git a/R/tmle3_Spec_npCausalGLM.R b/R/tmle3_Spec_npCausalGLM.R index 9e5c6297..eecabdcc 100644 --- a/R/tmle3_Spec_npCausalGLM.R +++ b/R/tmle3_Spec_npCausalGLM.R @@ -11,12 +11,12 @@ tmle3_Spec_npCausalGLM <- R6Class( portable = TRUE, class = TRUE, public = list( - initialize = function(formula, estimand = c("CATE", "CATT", "TSM", "OR", "RR"), treatment_level = 1, control_level = 0, + initialize = function(formula, estimand = c("CATE", "CATT", "TSM", "OR", "RR"), treatment_level = 1, control_level = 0, family_fluctuation = NULL, likelihood_override = NULL, variable_types = NULL, delta_epsilon = 0.025, ...) { estimand <- match.arg(estimand) private$.options <- list( - estimand = estimand, formula = formula, + estimand = estimand, formula = formula, family_fluctuation = family_fluctuation, treatment_level = treatment_level, control_level = control_level, delta_epsilon = delta_epsilon, likelihood_override = likelihood_override, variable_types = variable_types, ... @@ -25,7 +25,37 @@ tmle3_Spec_npCausalGLM <- R6Class( make_tmle_task = function(data, node_list, ...) { variable_types <- self$options$variable_types include_variance_node <- FALSE - scale_outcome <- FALSE + scale_outcome <- TRUE + Y <- data[[node_list$Y]] + family <- self$options$family_fluctuation + + if(is.null(family) && self$options$estimand %in% c("CATE", "CATT", "TSM")) { + Y <- tmle_task$get_tmle_node("Y") + if(all(Y %in% c( 0,1))) { + family <- "binomial" + } else if (all(Y >=0)) { + family <- "poisson" + scale_outcome <- FALSE + } else { + family <- "gaussian" + scale_outcome <- FALSE + } + } else if (is.null(family) && self$options$estimand == "RR") { + Y <- tmle_task$get_tmle_node("Y") + if(all(Y %in% c( 0,1))) { + family <- "binomial" + } else { + family <- "poisson" + scale_outcome <- FALSE + } + } else if (!is.null(family)) { + if(family == "binomial") { + scale_outcome <- TRUE + } else{ + scale_outcome <- FALSE + } + } + private$.options$family_fluctuation <- family binary_outcome <- all(data[[node_list$Y]] %in% c(0,1)) private$.options$binary_outcome <- binary_outcome if (self$options$estimand == "RR") { @@ -66,11 +96,13 @@ tmle3_Spec_npCausalGLM <- R6Class( } else if (self$options$estimand == "OR") { updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = delta_epsilon, constrain_step = TRUE, bounds = 0.0025, ...) } else if (self$options$estimand == "RR") { - if(!self$options$binary_outcome) { + if(self$options$family_fluctuation == "poisson") { bounds <- list(Y = c(0.0025, Inf), A = 0.005) } else { bounds <- list(Y = 0.0025, A = 0.005) } + + updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = delta_epsilon, constrain_step = TRUE, bounds = bounds, ...) } return(updater) @@ -83,30 +115,35 @@ tmle3_Spec_npCausalGLM <- R6Class( treatment_value <- self$options$treatment_level control_value <- self$options$control_level formula <- self$options$formula + family <- self$options$family_fluctuation A_levels <- tmle_task$npsem[["A"]]$variable_type$levels if (!is.null(A_levels)) { treatment_value <- factor(treatment_value, levels = A_levels) control_value <- factor(control_value, levels = A_levels) } + + + if (self$options$estimand == "TSM") { # If TSM generate params for all levels param <- lapply(union(treatment_value, control_value), function(value) { treatment <- define_lf(LF_static, "A", value = value) - return(Param_npTSM$new(targeted_likelihood, formula, treatment)) + return(Param_npTSM$new(targeted_likelihood, formula, treatment, family_fluctuation = family)) }) return(param) } else { treatment <- define_lf(LF_static, "A", value = treatment_value) control <- define_lf(LF_static, "A", value = control_value) } + if (self$options$estimand == "CATE") { - param <- Param_npCATE$new(targeted_likelihood, formula, treatment, control) + param <- Param_npCATE$new(targeted_likelihood, formula, treatment, control, family_fluctuation = family) } else if (self$options$estimand == "CATT") { - param <- Param_npCATT$new(targeted_likelihood, formula, treatment, control) + param <- Param_npCATT$new(targeted_likelihood, formula, treatment, control, family_fluctuation = family) } else if (self$options$estimand == "OR") { param <- Param_npOR$new(targeted_likelihood, formula, treatment, control) } else if (self$options$estimand == "RR") { - param <- Param_npRR$new(targeted_likelihood, formula, treatment, control, binary_outcome = self$options$binary_outcome) + param <- Param_npRR$new(targeted_likelihood, formula, treatment, control, binary_outcome = self$options$binary_outcome, family_fluctuation = family) } return(list(param)) } diff --git a/R/tmle3_Update.R b/R/tmle3_Update.R index a39db57a..a7d3f5e7 100644 --- a/R/tmle3_Update.R +++ b/R/tmle3_Update.R @@ -257,6 +257,9 @@ tmle3_Update <- R6Class( method = "Brent" ) epsilon <- optim_fit$par + #Qnew <- self$apply_submodel(submodel, submodel_data, epsilon) + #print(colMeans(submodel_data$H*(submodel_data$observed - Qnew))) + } else { epsilon <- self$delta_epsilon } @@ -283,7 +286,10 @@ tmle3_Update <- R6Class( start = rep(0, ncol(submodel_data$H)) ) }) - Qnew <- family_object$linkinv(family_object$linkfun(submodel_data$initial) + submodel_data$H %*% coef(submodel_fit) ) + #Qnew <- family_object$linkinv(family_object$linkfun(submodel_data$initial) + submodel_data$H %*% coef(submodel_fit)) + #print(colMeans(submodel_data$H*(submodel_data$observed - Qnew))) + + #Qnew <- family_object$linkinv(family_object$linkfun(submodel_data$initial) + submodel_data$H %*% coef(submodel_fit) ) } else if (self$fluctuation_type == "weighted") { if (self$one_dimensional) { suppressWarnings({ diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index 3e2285d0..847dfc9f 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -233,6 +233,20 @@ print(rowMeans(passes1)) ``` +```{r} +spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 , "RR", delta_epsilon = 0.01, family_fluctuation = "poisson") +out <- suppressWarnings(tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + out <- out$summary + out + +spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 , "RR", delta_epsilon = 10, family_fluctuation = "binomial") +out <- suppressWarnings(tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + out <- out$summary + out + +``` + + ```{r} From d75be43c1a8fb48caa54afc0d6ca9a42d18a6a6b Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Tue, 7 Sep 2021 10:56:00 -0700 Subject: [PATCH 50/65] bounded outcomes --- R/Param_npCATT.R | 2 +- R/tmle3_Spec_npCausalGLM.R | 3 ++- R/tmle3_Update.R | 1 + vignettes/testing.Rmd | 22 +++++++++++----------- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/R/Param_npCATT.R b/R/Param_npCATT.R index 4723d35c..5fd59d64 100644 --- a/R/Param_npCATT.R +++ b/R/Param_npCATT.R @@ -53,7 +53,7 @@ Param_npCATT <- R6Class( super$initialize(observed_likelihood, list(), outcome_node) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") - V <- model.matrix(formula_CATE, as.data.frame(W)) + V <- model.matrix(formula_CATT, as.data.frame(W)) private$.formula_names <- colnames(V) private$.targeted <- rep(T, ncol(V)) diff --git a/R/tmle3_Spec_npCausalGLM.R b/R/tmle3_Spec_npCausalGLM.R index eecabdcc..8a7c1273 100644 --- a/R/tmle3_Spec_npCausalGLM.R +++ b/R/tmle3_Spec_npCausalGLM.R @@ -30,9 +30,10 @@ tmle3_Spec_npCausalGLM <- R6Class( family <- self$options$family_fluctuation if(is.null(family) && self$options$estimand %in% c("CATE", "CATT", "TSM")) { - Y <- tmle_task$get_tmle_node("Y") + Y <- data[[node_list$Y]] if(all(Y %in% c( 0,1))) { family <- "binomial" + scale_outcome <- FALSE } else if (all(Y >=0)) { family <- "poisson" scale_outcome <- FALSE diff --git a/R/tmle3_Update.R b/R/tmle3_Update.R index a7d3f5e7..849bc5bc 100644 --- a/R/tmle3_Update.R +++ b/R/tmle3_Update.R @@ -278,6 +278,7 @@ tmle3_Update <- R6Class( } } else { if (self$fluctuation_type == "standard") { + suppressWarnings({ submodel_fit <- glm(observed ~ H - 1, submodel_data, offset = family_object$linkfun(submodel_data$initial), diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index 847dfc9f..a9ee75e1 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -15,16 +15,16 @@ passes <- c() passes1 <- c() passes2 <- c() -for(i in 1:1){ +#for(i in 1:1){ print(i) n <- 500 W <- runif(n, -1, 1) A <- rbinom(n, size = 1, prob = plogis(W)) -Y <- rbinom(n, size = 1, prob = plogis(W)) #rnorm(n, mean = A*W + A+W, sd = 0.3) +Y <- rnorm(n, mean = A*W + A+W, sd = 0.3) data <- data.table(W,A,Y) -lrnr_Y0W <- Lrnr_gam$new() -lrnr_A <- Lrnr_gam$new() +lrnr_Y0W <- Lrnr_glm$new() +lrnr_A <- Lrnr_glm$new() node_list <- list (W = "W", A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W ) @@ -34,23 +34,23 @@ spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "CATE") suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) out <- out$summary passes <- cbind(passes , out$lower <= 1 & out$upper >= 1) +out - -spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "CATT") -suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) +spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "CATT", family = "binomial") + (out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) out <- out$summary passes1 <- cbind(passes1 , out$lower <= 1 & out$upper >= 1) - +out spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W, "CATE") suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) out <- out$summary passes2 <- cbind(passes2 , out$lower <= 1 & out$upper >= 1) - +out print(rowMeans(passes)) print(rowMeans(passes1)) print(rowMeans(passes2)) -} + ``` @@ -67,7 +67,7 @@ lrnr_A <- Lrnr_gam$new() node_list <- list (W = "W", A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W ) -spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "TSM") +spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "TSM" ) suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) out <- out$summary passes2 <- cbind(passes2 , out$lower <= 1 & out$upper >= 1) From ed89de888c7e7782de691e9f84773b7482138b51 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Tue, 7 Sep 2021 11:58:25 -0700 Subject: [PATCH 51/65] bounded outcomes --- R/tmle3_Spec_npCausalGLM.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/tmle3_Spec_npCausalGLM.R b/R/tmle3_Spec_npCausalGLM.R index 8a7c1273..4889826c 100644 --- a/R/tmle3_Spec_npCausalGLM.R +++ b/R/tmle3_Spec_npCausalGLM.R @@ -30,7 +30,7 @@ tmle3_Spec_npCausalGLM <- R6Class( family <- self$options$family_fluctuation if(is.null(family) && self$options$estimand %in% c("CATE", "CATT", "TSM")) { - Y <- data[[node_list$Y]] + if(all(Y %in% c( 0,1))) { family <- "binomial" scale_outcome <- FALSE @@ -42,7 +42,7 @@ tmle3_Spec_npCausalGLM <- R6Class( scale_outcome <- FALSE } } else if (is.null(family) && self$options$estimand == "RR") { - Y <- tmle_task$get_tmle_node("Y") + if(all(Y %in% c( 0,1))) { family <- "binomial" } else { @@ -72,7 +72,8 @@ tmle3_Spec_npCausalGLM <- R6Class( } tmle_task <- point_tx_task(data, node_list, variable_types, scale_outcome = scale_outcome, include_variance_node = include_variance_node) - + print(tmle_task) + print(tmle_task$data) return(tmle_task) }, make_initial_likelihood = function(tmle_task, learner_list = NULL) { From 469e7f8a9e5d670f9842512971d7fcc554b4b04e Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Tue, 7 Sep 2021 12:35:12 -0700 Subject: [PATCH 52/65] bounded outcomes --- NAMESPACE | 103 ------------------------------------- R/tmle3_Spec_npCausalGLM.R | 3 +- 2 files changed, 1 insertion(+), 105 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d512c9b3..2de6bdc2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,108 +1,5 @@ # Generated by roxygen2: do not edit by hand -S3method(plot,tmle3_Fit) -export(CF_Likelihood) -export(LF_base) -export(LF_derived) -export(LF_emp) -export(LF_fit) -export(LF_known) -export(LF_static) -export(LF_targeted) -export(Likelihood) -export(Likelihood_cache) -export(Lrnr_glm_semiparametric) -export(Param_ATC) -export(Param_ATE) -export(Param_ATT) -export(Param_MSM) -export(Param_TSM) -export(Param_base) -export(Param_delta) -export(Param_mean) -export(Param_npCATE) -export(Param_npCATT) -export(Param_npOR) -export(Param_npRR) -export(Param_npTSM) -export(Param_spCATE) -export(Param_spOR) -export(Param_spRR) -export(Param_stratified) -export(Param_survival) -export(Targeted_Likelihood) -export(all_ancestors) -export(bound) -export(define_lf) -export(define_node) -export(define_param) -export(delta_param_ATE) -export(delta_param_OR) -export(delta_param_PAF) -export(delta_param_PAR) -export(delta_param_RR) -export(density_formula) -export(discretize_variable) -export(fit_tmle3) -export(generate_loss_function_from_family) -export(generate_submodel_from_family) -export(get_propensity_scores) -export(get_submodel_spec) -export(loss_function_least_squares) -export(loss_function_loglik) -export(loss_function_loglik_binomial) -export(loss_function_poisson) -export(make_CF_Likelihood) -export(make_Likelihood) -export(make_submodel_spec) -export(make_tmle3_Task) -export(plot_vim) -export(point_tx_likelihood) -export(point_tx_npsem) -export(point_tx_task) -export(process_missing) -export(propensity_score_plot) -export(propensity_score_table) -export(submodel_exp) -export(submodel_linear) -export(submodel_logistic_switch) -export(submodel_spec_logistic_switch) -export(summary_from_estimates) -export(survival_tx_likelihood) -export(survival_tx_npsem) -export(survival_tx_task) -export(time_ordering) -export(tmle3) -export(tmle3_Fit) -export(tmle3_Node) -export(tmle3_Spec) -export(tmle3_Spec_ATC) -export(tmle3_Spec_ATE) -export(tmle3_Spec_ATT) -export(tmle3_Spec_MSM) -export(tmle3_Spec_OR) -export(tmle3_Spec_PAR) -export(tmle3_Spec_RR) -export(tmle3_Spec_TSM_all) -export(tmle3_Spec_npCausalGLM) -export(tmle3_Spec_spCausalGLM) -export(tmle3_Spec_stratified) -export(tmle3_Spec_survival) -export(tmle3_Task) -export(tmle3_Update) -export(tmle3_Update_survival) -export(tmle3_vim) -export(tmle_ATC) -export(tmle_ATE) -export(tmle_ATT) -export(tmle_MSM) -export(tmle_OR) -export(tmle_PAR) -export(tmle_RR) -export(tmle_TSM_all) -export(tmle_stratified) -export(tmle_survival) -export(train_lf) import(data.table) import(ggplot2) importFrom(R6,R6Class) diff --git a/R/tmle3_Spec_npCausalGLM.R b/R/tmle3_Spec_npCausalGLM.R index 4889826c..ca686144 100644 --- a/R/tmle3_Spec_npCausalGLM.R +++ b/R/tmle3_Spec_npCausalGLM.R @@ -72,8 +72,7 @@ tmle3_Spec_npCausalGLM <- R6Class( } tmle_task <- point_tx_task(data, node_list, variable_types, scale_outcome = scale_outcome, include_variance_node = include_variance_node) - print(tmle_task) - print(tmle_task$data) + return(tmle_task) }, make_initial_likelihood = function(tmle_task, learner_list = NULL) { From 4d4fb2f4474c983ad0d498cf0bf413adcc1ae749 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Tue, 7 Sep 2021 13:11:07 -0700 Subject: [PATCH 53/65] plz --- NAMESPACE | 103 +++++++++++++++++++++++++++++++++++++ R/Param_npCATE.R | 6 +-- R/Param_npCATT.R | 4 +- R/Param_npRR.R | 7 ++- R/Param_npTSM.R | 6 +-- R/submodels.R | 4 +- R/tmle3_Spec_npCausalGLM.R | 26 +++++----- R/tmle3_Update.R | 18 +++---- 8 files changed, 136 insertions(+), 38 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 2de6bdc2..d512c9b3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,108 @@ # Generated by roxygen2: do not edit by hand +S3method(plot,tmle3_Fit) +export(CF_Likelihood) +export(LF_base) +export(LF_derived) +export(LF_emp) +export(LF_fit) +export(LF_known) +export(LF_static) +export(LF_targeted) +export(Likelihood) +export(Likelihood_cache) +export(Lrnr_glm_semiparametric) +export(Param_ATC) +export(Param_ATE) +export(Param_ATT) +export(Param_MSM) +export(Param_TSM) +export(Param_base) +export(Param_delta) +export(Param_mean) +export(Param_npCATE) +export(Param_npCATT) +export(Param_npOR) +export(Param_npRR) +export(Param_npTSM) +export(Param_spCATE) +export(Param_spOR) +export(Param_spRR) +export(Param_stratified) +export(Param_survival) +export(Targeted_Likelihood) +export(all_ancestors) +export(bound) +export(define_lf) +export(define_node) +export(define_param) +export(delta_param_ATE) +export(delta_param_OR) +export(delta_param_PAF) +export(delta_param_PAR) +export(delta_param_RR) +export(density_formula) +export(discretize_variable) +export(fit_tmle3) +export(generate_loss_function_from_family) +export(generate_submodel_from_family) +export(get_propensity_scores) +export(get_submodel_spec) +export(loss_function_least_squares) +export(loss_function_loglik) +export(loss_function_loglik_binomial) +export(loss_function_poisson) +export(make_CF_Likelihood) +export(make_Likelihood) +export(make_submodel_spec) +export(make_tmle3_Task) +export(plot_vim) +export(point_tx_likelihood) +export(point_tx_npsem) +export(point_tx_task) +export(process_missing) +export(propensity_score_plot) +export(propensity_score_table) +export(submodel_exp) +export(submodel_linear) +export(submodel_logistic_switch) +export(submodel_spec_logistic_switch) +export(summary_from_estimates) +export(survival_tx_likelihood) +export(survival_tx_npsem) +export(survival_tx_task) +export(time_ordering) +export(tmle3) +export(tmle3_Fit) +export(tmle3_Node) +export(tmle3_Spec) +export(tmle3_Spec_ATC) +export(tmle3_Spec_ATE) +export(tmle3_Spec_ATT) +export(tmle3_Spec_MSM) +export(tmle3_Spec_OR) +export(tmle3_Spec_PAR) +export(tmle3_Spec_RR) +export(tmle3_Spec_TSM_all) +export(tmle3_Spec_npCausalGLM) +export(tmle3_Spec_spCausalGLM) +export(tmle3_Spec_stratified) +export(tmle3_Spec_survival) +export(tmle3_Task) +export(tmle3_Update) +export(tmle3_Update_survival) +export(tmle3_vim) +export(tmle_ATC) +export(tmle_ATE) +export(tmle_ATT) +export(tmle_MSM) +export(tmle_OR) +export(tmle_PAR) +export(tmle_RR) +export(tmle_TSM_all) +export(tmle_stratified) +export(tmle_survival) +export(train_lf) import(data.table) import(ggplot2) importFrom(R6,R6Class) diff --git a/R/Param_npCATE.R b/R/Param_npCATE.R index e0c3bf82..632d410e 100644 --- a/R/Param_npCATE.R +++ b/R/Param_npCATE.R @@ -52,7 +52,7 @@ Param_npCATE <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_CATE = ~1, intervention_list_treatment, intervention_list_control, family_fluctuation = c( "binomial", "gaussian", "poisson"), outcome_node = "Y") { + initialize = function(observed_likelihood, formula_CATE = ~1, intervention_list_treatment, intervention_list_control, family_fluctuation = c("binomial", "gaussian", "poisson"), outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") @@ -61,7 +61,7 @@ Param_npCATE <- R6Class( private$.targeted <- rep(T, ncol(V)) family_fluctuation <- match.arg(family_fluctuation) - private$.submodel <- list(Y=family_fluctuation) + private$.submodel <- list(Y = family_fluctuation) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists @@ -120,7 +120,7 @@ Param_npCATE <- R6Class( scaleinv <- solve(scale) EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y - Q) EIF_WA <- apply(V, 2, function(v) { - self$weights * (v * (Q1 - Q0 - CATE) - mean(v*self$weights * (Q1 - Q0 - CATE))) + self$weights * (v * (Q1 - Q0 - CATE) - mean(v * self$weights * (Q1 - Q0 - CATE))) }) %*% scaleinv # print(dim(EIF_Y)) diff --git a/R/Param_npCATT.R b/R/Param_npCATT.R index 5fd59d64..9735c764 100644 --- a/R/Param_npCATT.R +++ b/R/Param_npCATT.R @@ -49,7 +49,7 @@ Param_npCATT <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_CATT = ~1, intervention_list_treatment, intervention_list_control, family_fluctuation = c( "binomial", "gaussian", "poisson"), outcome_node = "Y") { + initialize = function(observed_likelihood, formula_CATT = ~1, intervention_list_treatment, intervention_list_control, family_fluctuation = c("binomial", "gaussian", "poisson"), outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") @@ -58,7 +58,7 @@ Param_npCATT <- R6Class( private$.targeted <- rep(T, ncol(V)) family_fluctuation <- match.arg(family_fluctuation) - private$.submodel <- list(Y=family_fluctuation) + private$.submodel <- list(Y = family_fluctuation) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists diff --git a/R/Param_npRR.R b/R/Param_npRR.R index ecc80dfc..73c2556a 100644 --- a/R/Param_npRR.R +++ b/R/Param_npRR.R @@ -52,7 +52,7 @@ Param_npRR <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_RR = ~1, intervention_list_treatment, intervention_list_control, binary_outcome = FALSE, family_fluctuation = c("poisson", "binomial"),outcome_node = "Y") { + initialize = function(observed_likelihood, formula_RR = ~1, intervention_list_treatment, intervention_list_control, binary_outcome = FALSE, family_fluctuation = c("poisson", "binomial"), outcome_node = "Y") { super$initialize(observed_likelihood, list(), outcome_node) family_fluctuation <- match.arg(family_fluctuation) training_task <- self$observed_likelihood$training_task @@ -61,7 +61,7 @@ Param_npRR <- R6Class( private$.formula_names <- colnames(V) private$.targeted <- rep(T, ncol(V)) private$.binary_outcome <- binary_outcome - private$.submodel <- list(Y=family_fluctuation) + private$.submodel <- list(Y = family_fluctuation) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { @@ -124,9 +124,8 @@ Param_npRR <- R6Class( scaleinv <- solve(scale) EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y - Q) EIF_WA <- apply(V, 2, function(v) { - self$weights * (v * (RR*Q0 - Q1) - mean(self$weights * v*(RR*Q0 - Q1))) + self$weights * (v * (RR * Q0 - Q1) - mean(self$weights * v * (RR * Q0 - Q1))) }) %*% scaleinv - } diff --git a/R/Param_npTSM.R b/R/Param_npTSM.R index b8e8cf84..04f45e84 100644 --- a/R/Param_npTSM.R +++ b/R/Param_npTSM.R @@ -52,9 +52,9 @@ Param_npTSM <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_TSM = ~1, intervention_list, family_fluctuation = c( "binomial", "gaussian", "poisson"), outcome_node = "Y") { + initialize = function(observed_likelihood, formula_TSM = ~1, intervention_list, family_fluctuation = c("binomial", "gaussian", "poisson"), outcome_node = "Y") { family_fluctuation <- match.arg(family_fluctuation) - private$.submodel <- list(Y=family_fluctuation) + private$.submodel <- list(Y = family_fluctuation) super$initialize(observed_likelihood, list(), outcome_node) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") @@ -119,7 +119,7 @@ Param_npTSM <- R6Class( EIF_Y <- self$weights * (H %*% scaleinv) * as.vector(Y - Q) EIF_WA <- apply(V, 2, function(v) { - self$weights * (v * (Q1 - Q1beta) - mean(self$weights * v* (Q1 - Q1beta))) + self$weights * (v * (Q1 - Q1beta) - mean(self$weights * v * (Q1 - Q1beta))) }) %*% scaleinv } diff --git a/R/submodels.R b/R/submodels.R index d5bf7f69..27e9b363 100644 --- a/R/submodels.R +++ b/R/submodels.R @@ -38,8 +38,8 @@ submodel_logistic_switch <- function(eps, offset, X, observed) { #' @param v ... #' @export loss_function_loglik_binomial <- function(estimate, observed, weights = NULL, likelihood = NULL, tmle_task = NULL, fold_number = NULL) { - #loss <- -1 * ifelse(observed == 1, log(estimate), log(1 - estimate)) - loss <- -1 * (observed * log(estimate) + (1-observed) * log(1-estimate)) + # loss <- -1 * ifelse(observed == 1, log(estimate), log(1 - estimate)) + loss <- -1 * (observed * log(estimate) + (1 - observed) * log(1 - estimate)) if (!is.null(weights)) { loss <- weights * loss } diff --git a/R/tmle3_Spec_npCausalGLM.R b/R/tmle3_Spec_npCausalGLM.R index ca686144..271bed92 100644 --- a/R/tmle3_Spec_npCausalGLM.R +++ b/R/tmle3_Spec_npCausalGLM.R @@ -13,7 +13,7 @@ tmle3_Spec_npCausalGLM <- R6Class( public = list( initialize = function(formula, estimand = c("CATE", "CATT", "TSM", "OR", "RR"), treatment_level = 1, control_level = 0, family_fluctuation = NULL, likelihood_override = NULL, - variable_types = NULL, delta_epsilon = 0.025, ...) { + variable_types = NULL, delta_epsilon = 0.025, ...) { estimand <- match.arg(estimand) private$.options <- list( estimand = estimand, formula = formula, family_fluctuation = family_fluctuation, @@ -29,12 +29,11 @@ tmle3_Spec_npCausalGLM <- R6Class( Y <- data[[node_list$Y]] family <- self$options$family_fluctuation - if(is.null(family) && self$options$estimand %in% c("CATE", "CATT", "TSM")) { - - if(all(Y %in% c( 0,1))) { + if (is.null(family) && self$options$estimand %in% c("CATE", "CATT", "TSM")) { + if (all(Y %in% c(0, 1))) { family <- "binomial" scale_outcome <- FALSE - } else if (all(Y >=0)) { + } else if (all(Y >= 0)) { family <- "poisson" scale_outcome <- FALSE } else { @@ -42,31 +41,30 @@ tmle3_Spec_npCausalGLM <- R6Class( scale_outcome <- FALSE } } else if (is.null(family) && self$options$estimand == "RR") { - - if(all(Y %in% c( 0,1))) { + if (all(Y %in% c(0, 1))) { family <- "binomial" } else { family <- "poisson" scale_outcome <- FALSE } } else if (!is.null(family)) { - if(family == "binomial") { + if (family == "binomial") { scale_outcome <- TRUE - } else{ + } else { scale_outcome <- FALSE } } private$.options$family_fluctuation <- family - binary_outcome <- all(data[[node_list$Y]] %in% c(0,1)) + binary_outcome <- all(data[[node_list$Y]] %in% c(0, 1)) private$.options$binary_outcome <- binary_outcome if (self$options$estimand == "RR") { - if(binary_outcome) { + if (binary_outcome) { type <- "binomial" } else { type <- "continuous" } - variable_types <- list(Y = variable_type(type )) - #scale_outcome <- binary_outcome + variable_types <- list(Y = variable_type(type)) + # scale_outcome <- binary_outcome } else if (self$options$estimand == "OR") { variable_types <- list(Y = variable_type("binomial")) } @@ -97,7 +95,7 @@ tmle3_Spec_npCausalGLM <- R6Class( } else if (self$options$estimand == "OR") { updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = delta_epsilon, constrain_step = TRUE, bounds = 0.0025, ...) } else if (self$options$estimand == "RR") { - if(self$options$family_fluctuation == "poisson") { + if (self$options$family_fluctuation == "poisson") { bounds <- list(Y = c(0.0025, Inf), A = 0.005) } else { bounds <- list(Y = 0.0025, A = 0.005) diff --git a/R/tmle3_Update.R b/R/tmle3_Update.R index 849bc5bc..0234738e 100644 --- a/R/tmle3_Update.R +++ b/R/tmle3_Update.R @@ -146,7 +146,7 @@ tmle3_Update <- R6Class( ED <- colMeans(EIF_components) - EDnormed <- ED / norm(ED, type = "2") * sqrt(length(ED)) #Ensures step size generalizes to many parameters better + EDnormed <- ED / norm(ED, type = "2") * sqrt(length(ED)) # Ensures step size generalizes to many parameters better if (length(EIF_components) == 0 || ncol(EIF_components) != ncol(covariates_dt)) { stop("Not all params provide EIF components") } @@ -155,7 +155,7 @@ tmle3_Update <- R6Class( ) if (is.null(EIF_components)) { ED <- ED_from_estimates(self$current_estimates) - EDnormed <- ED / norm(ED, type = "2") * sqrt(length(ED)) + EDnormed <- ED / norm(ED, type = "2") * sqrt(length(ED)) } # covariates_dt <- self$collapse_covariates(self$current_estimates, covariates_dt) } else { @@ -257,9 +257,8 @@ tmle3_Update <- R6Class( method = "Brent" ) epsilon <- optim_fit$par - #Qnew <- self$apply_submodel(submodel, submodel_data, epsilon) - #print(colMeans(submodel_data$H*(submodel_data$observed - Qnew))) - + # Qnew <- self$apply_submodel(submodel, submodel_data, epsilon) + # print(colMeans(submodel_data$H*(submodel_data$observed - Qnew))) } else { epsilon <- self$delta_epsilon } @@ -278,7 +277,6 @@ tmle3_Update <- R6Class( } } else { if (self$fluctuation_type == "standard") { - suppressWarnings({ submodel_fit <- glm(observed ~ H - 1, submodel_data, offset = family_object$linkfun(submodel_data$initial), @@ -287,11 +285,11 @@ tmle3_Update <- R6Class( start = rep(0, ncol(submodel_data$H)) ) }) - #Qnew <- family_object$linkinv(family_object$linkfun(submodel_data$initial) + submodel_data$H %*% coef(submodel_fit)) - #print(colMeans(submodel_data$H*(submodel_data$observed - Qnew))) + # Qnew <- family_object$linkinv(family_object$linkfun(submodel_data$initial) + submodel_data$H %*% coef(submodel_fit)) + # print(colMeans(submodel_data$H*(submodel_data$observed - Qnew))) - #Qnew <- family_object$linkinv(family_object$linkfun(submodel_data$initial) + submodel_data$H %*% coef(submodel_fit) ) - } else if (self$fluctuation_type == "weighted") { + # Qnew <- family_object$linkinv(family_object$linkfun(submodel_data$initial) + submodel_data$H %*% coef(submodel_fit) ) + } else if (self$fluctuation_type == "weighted") { if (self$one_dimensional) { suppressWarnings({ submodel_fit <- glm(observed ~ 1, submodel_data, From 48324de5f233d221c8beba9a89e71ea005098841 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Tue, 7 Sep 2021 14:21:01 -0700 Subject: [PATCH 54/65] small --- R/Param_npRR.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Param_npRR.R b/R/Param_npRR.R index 73c2556a..00ba3e1b 100644 --- a/R/Param_npRR.R +++ b/R/Param_npRR.R @@ -158,7 +158,7 @@ Param_npRR <- R6Class( IC <- as.matrix(EIF) - result <- list(psi = beta, IC = IC, RR = RR) + result <- list(psi = beta, IC = IC, RR = RR, transform = exp) return(result) } ), From ab16187d11a796f440e2ccb1131d402cfce5baa8 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Tue, 7 Sep 2021 18:36:30 -0700 Subject: [PATCH 55/65] added Param_coxph --- NAMESPACE | 2 + R/Param_coxph.R | 245 ++++++++++++++++++++++++++++++++++++++++ R/tmle3_Spec_coxph.R | 100 ++++++++++++++++ R/tmle3_Task.R | 8 +- R/tmle3_Update.R | 15 ++- R/utils.R | 2 +- man/Param_ATC.Rd | 1 + man/Param_ATE.Rd | 1 + man/Param_ATT.Rd | 1 + man/Param_MSM.Rd | 1 + man/Param_TSM.Rd | 1 + man/Param_base.Rd | 1 + man/Param_coxph.Rd | 77 +++++++++++++ man/Param_delta.Rd | 1 + man/Param_mean.Rd | 1 + man/Param_npCATE.Rd | 1 + man/Param_npCATT.Rd | 1 + man/Param_npOR.Rd | 1 + man/Param_npRR.Rd | 1 + man/Param_npTSM.Rd | 1 + man/Param_spCATE.Rd | 1 + man/Param_spOR.Rd | 1 + man/Param_spRR.Rd | 1 + man/Param_stratified.Rd | 1 + man/Param_survival.Rd | 1 + man/define_param.Rd | 1 + man/tmle3_Fit.Rd | 1 + man/tmle3_Spec_coxph.Rd | 8 ++ vignettes/testing.Rmd | 57 ++++++++++ 29 files changed, 525 insertions(+), 9 deletions(-) create mode 100644 R/Param_coxph.R create mode 100644 R/tmle3_Spec_coxph.R create mode 100644 man/Param_coxph.Rd create mode 100644 man/tmle3_Spec_coxph.Rd diff --git a/NAMESPACE b/NAMESPACE index d512c9b3..be0ae6d2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ export(Param_ATT) export(Param_MSM) export(Param_TSM) export(Param_base) +export(Param_coxph) export(Param_delta) export(Param_mean) export(Param_npCATE) @@ -84,6 +85,7 @@ export(tmle3_Spec_OR) export(tmle3_Spec_PAR) export(tmle3_Spec_RR) export(tmle3_Spec_TSM_all) +export(tmle3_Spec_coxph) export(tmle3_Spec_npCausalGLM) export(tmle3_Spec_spCausalGLM) export(tmle3_Spec_stratified) diff --git a/R/Param_coxph.R b/R/Param_coxph.R new file mode 100644 index 00000000..a2b0ec60 --- /dev/null +++ b/R/Param_coxph.R @@ -0,0 +1,245 @@ +#' Nonparametric inference for user-specified parametric working models for the conditional treatment effect. +#' The true conditional average treatment effect is projected onto a parametric working model using least-squares regression. +#' Unlike \code{Param_npCATT}, this function uses all observations to compute the projection. +#' This can be used to assess heterogeneity of the average treatment effect. +#' We note that `formula_coxph = ~ 1` gives an estimator of the nonparametric average treatment effect (ATE). +#' +#' Parameter definition for the Average Treatment Effect (ATE). +#' @importFrom R6 R6Class +#' @importFrom uuid UUIDgenerate +#' @importFrom methods is +#' @family Parameters +#' @keywords data +#' +#' @return \code{Param_base} object +#' +#' @format \code{\link{R6Class}} object. +#' +#' @section Constructor: +#' \code{define_param(Param_ATT, observed_likelihood, intervention_list, ..., outcome_node)} +#' +#' \describe{ +#' \item{\code{observed_likelihood}}{A \code{\link{Likelihood}} cocoxphesponding to the observed likelihood +#' } +#' \item{\code{formula_coxph}}{... +#' } +#' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. +#' } +#' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. +#' } +#' \item{\code{...}}{Not cucoxphently used. +#' } +#' \item{\code{outcome_node}}{character, the name of the node that should be treated as the outcome +#' } +#' } +#' + +#' @section Fields: +#' \describe{ +#' \item{\code{cf_likelihood_treatment}}{the counterfactual likelihood for the treatment +#' } +#' \item{\code{cf_likelihood_control}}{the counterfactual likelihood for the control +#' } +#' \item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention +#' } +#' \item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention +#' } +#' } +#' @export +Param_coxph <- R6Class( + classname = "Param_coxph", + portable = TRUE, + class = TRUE, + inherit = Param_base, + public = list( + initialize = function(observed_likelihood, formula_coxph = ~1, intervention_list_treatment, intervention_list_control, family_fluctuation = c("binomial"), outcome_node = "N") { + print(outcome_node) + super$initialize(observed_likelihood, list(), outcome_node = outcome_node) + family_fluctuation <- match.arg(family_fluctuation) + training_task <- self$observed_likelihood$training_task + W <- training_task$get_regression_task("W", is_time_variant = TRUE)$Y + print(W) + print(formula_coxph) + V <- model.matrix(formula_coxph, as.data.frame(W)) + private$.formula_names <- colnames(V) + private$.targeted <- rep(T, ncol(V)) + private$.submodel <- list(N = family_fluctuation) + + + if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { + # add delta_Y=0 to intervention lists + outcome_censoring_node <- observed_likelihood$censoring_nodes[[outcome_node]] + censoring_intervention <- define_lf(LF_static, outcome_censoring_node, value = 1) + intervention_list_treatment <- c(intervention_list_treatment, censoring_intervention) + intervention_list_control <- c(intervention_list_control, censoring_intervention) + } + private$.formula_coxph <- formula_coxph + private$.cf_likelihood_treatment <- CF_Likelihood$new(observed_likelihood, intervention_list_treatment) + private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) + }, + long_to_mat = function(x, id, time) { + dt <- data.table(id = id, time = time, x = as.vector(x)) + wide <- dcast(dt, id ~ time, value.var = "x") + mat <- as.matrix(wide[, -1, with = FALSE]) + return(mat) + }, + hm_to_sm = function(hm) { + # TODO: check + sm <- t(apply(1 - hm, 1, cumprod)) + # sm <- cbind(1,sm[,-ncol(sm)]) + return(sm) + }, + clever_covariates = function(tmle_task = NULL, fold_number = "full", is_training_task = TRUE) { + training_task <- self$observed_likelihood$training_task + if (is.null(tmle_task)) { + tmle_task <- training_task + } + + + cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] + cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] + intervention_nodes <- union(names(self$intervention_list_treatment), names(self$intervention_list_control)) + + W <- tmle_task$get_regression_task("W", is_time_variant = TRUE)$Y + A <- tmle_task$get_tmle_node("A", format = T)[[1]] + dNt <- tmle_task$get_tmle_node("N", format = F) + dCt <- tmle_task$get_tmle_node("A_c", format = F) + prefailure <- as.numeric(tmle_task$get_tmle_node("pre_failure")) + Vt <- model.matrix(self$formula_coxph, as.data.frame(W)) + + g <- self$observed_likelihood$get_likelihoods(tmle_task, "A", fold_number) + g1 <- ifelse(A == 1, g, 1 - g) + g0 <- 1 - g1 + + pN <- self$observed_likelihood$get_likelihoods(tmle_task, "N", fold_number) + pC <- self$observed_likelihood$get_likelihoods(tmle_task, "A_c", fold_number) + pN0 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task0, "N", fold_number)) + pN1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "N", fold_number) + + time <- tmle_task$time + id <- tmle_task$id + long_order <- order(id, time) + + pC_mat <- self$long_to_mat(pC, id, time) + S_censor_mat <- self$hm_to_sm(pC_mat) + S_censor_mat <- cbind(1, S_censor_mat[, -ncol(S_censor_mat)]) + S_censor <- as.vector(S_censor_mat) # Back to long, CHECK + + beta <- suppressWarnings(coef(glm.fit(Vt, pN1, offset = log(pN0), family = poisson(), weights = prefailure * self$weights / S_censor))) + HR <- as.vector(exp(Vt %*% beta)) + + t_grid <- sort(unique(time)) + + + H <- as.matrix(Vt * (prefailure / S_censor) * (A / g1 * HR - (1 - A) / g0)) + + + EIF_N <- NULL + # Store EIF component + if (is_training_task) { + scale <- apply(Vt, 2, function(v) { + apply(self$weights * Vt * (v) * HR * pN0, 2, sum) / length(unique(id)) + }) + + + scaleinv <- solve(scale) + EIF_N <- self$weights * (H %*% scaleinv) * as.vector(dNt - pN) + EIF_WA <- apply(Vt, 2, function(v) { + long_vec <- self$weights * (v * (HR * pN0 - pN1)) + wide_vec <- self$long_to_mat(long_vec, id, time) + means <- colMeans(wide_vec) + as.vector(t(t(wide_vec) - means)) + }) %*% scaleinv + } + + + + + + return(list(N = H, EIF = list(N = EIF_N, WA = EIF_WA))) + }, + estimates = function(tmle_task = NULL, fold_number = "full") { + if (is.null(tmle_task)) { + tmle_task <- self$observed_likelihood$training_task + } + cf_task1 <- self$cf_likelihood_treatment$enumerate_cf_tasks(tmle_task)[[1]] + cf_task0 <- self$cf_likelihood_control$enumerate_cf_tasks(tmle_task)[[1]] + + W <- tmle_task$get_regression_task("W", is_time_variant = TRUE)$Y + A <- tmle_task$get_tmle_node("A", format = T)[[1]] + dNt <- tmle_task$get_tmle_node("N", format = F) + dCt <- tmle_task$get_tmle_node("A_c", format = F) + prefailure <- as.numeric(tmle_task$get_tmle_node("pre_failure")) + Vt <- model.matrix(self$formula_coxph, as.data.frame(W)) + + weights <- tmle_task$weights + time <- tmle_task$time + id <- tmle_task$id + long_order <- order(id, time) + # clever_covariates happen here (for this param) only, but this is repeated computation + EIF <- self$clever_covariates(tmle_task, fold_number, is_training_task = TRUE)$EIF + EIF <- EIF$N + EIF$WA + + EIF <- apply(EIF, 2, function(col) { + rowSums(self$long_to_mat(col, id, time)) + }) + + pN <- self$observed_likelihood$get_likelihoods(tmle_task, "N", fold_number) + pC <- self$observed_likelihood$get_likelihoods(tmle_task, "A_c", fold_number) + pN0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "N", fold_number) + pN1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "N", fold_number) + + + + pC_mat <- self$long_to_mat(pC, id, time) + S_censor_mat <- self$hm_to_sm(pC_mat) + S_censor_mat <- cbind(1, S_censor_mat[, -ncol(S_censor_mat)]) + S_censor <- as.vector(S_censor_mat) # Back to long, CHECK + + + + beta <- suppressWarnings(coef(glm.fit(Vt, pN1, offset = log(pN0), family = poisson(), weights = prefailure * self$weights / S_censor))) + + + HR <- exp(Vt %*% beta) + + IC <- as.matrix(EIF) + print(colMeans(IC)) + result <- list(psi = beta, IC = IC, HR = HR, transform = exp) + return(result) + } + ), + active = list( + name = function() { + param_form <- private$.formula_names # sprintf("coxph[%s_{%s}-%s_{%s}]", self$outcome_node, self$cf_likelihood_treatment$name, self$outcome_node, self$cf_likelihood_control$name) + return(param_form) + }, + cf_likelihood_treatment = function() { + return(private$.cf_likelihood_treatment) + }, + cf_likelihood_control = function() { + return(private$.cf_likelihood_control) + }, + intervention_list_treatment = function() { + return(self$cf_likelihood_treatment$intervention_list) + }, + intervention_list_control = function() { + return(self$cf_likelihood_control$intervention_list) + }, + update_nodes = function() { + return(c(self$outcome_node)) + }, + formula_coxph = function() { + return(private$.formula_coxph) + } + ), + private = list( + .type = "coxph (HR)", + .cf_likelihood_treatment = NULL, + .cf_likelihood_control = NULL, + .supports_outcome_censoring = TRUE, + .formula_coxph = NULL, + .submodel = list(N = "binomial_logit"), + .formula_names = NULL + ) +) diff --git a/R/tmle3_Spec_coxph.R b/R/tmle3_Spec_coxph.R new file mode 100644 index 00000000..e43786a3 --- /dev/null +++ b/R/tmle3_Spec_coxph.R @@ -0,0 +1,100 @@ +#' Defines a TML Estimator (except for the data) +#' +#' +#' @importFrom R6 R6Class +#' +#' @export +# +tmle3_Spec_coxph <- R6Class( + classname = "tmle3_Spec_coxph", + portable = TRUE, + class = TRUE, + inherit = tmle3_Spec, + public = list( + initialize = function(formula = ~1, treatment_level, control_level, variable_types = NULL, delta_epsilon = 0.05, ...) { + super$initialize( + formula = formula, + treatment_level = treatment_level, + control_level = control_level, + delta_epsilon = delta_epsilon, + variable_types = variable_types, + + ... + ) + }, + + # TODO: check + transform_data = function(data, node_list) { + T_tilde_name <- node_list$T_tilde + Delta_name <- node_list$Delta + T_tilde_data <- data[T_tilde_name] + Delta_data <- data[Delta_name] + k_grid <- 1:max(T_tilde_data) + + if (is.null(node_list$id)) { + id <- 1:nrow(data) + data <- cbind(id = id, data) + node_list$id <- "id" + } + + all_times <- lapply(k_grid, function(t_current) { + df_time <- copy(data) + # TODO: check + df_time$N <- as.numeric(t_current == T_tilde_data & Delta_data == 1) + df_time$A_c <- as.numeric(t_current == T_tilde_data & Delta_data == 0) + df_time$pre_failure <- as.numeric(t_current <= T_tilde_data) + df_time$t <- t_current + return(df_time) + }) + df_long <- rbindlist(all_times) + + long_node_list <- copy(node_list) + long_node_list$time <- "t" + long_node_list$N <- "N" + long_node_list$A_c <- "A_c" + long_node_list$pre_failure <- "pre_failure" + + return(list(long_data = df_long, long_node_list = long_node_list)) + }, + + make_tmle_task = function(data, node_list, ...) { + variable_types <- self$options$variable_types + data_list <- self$transform_data(data, node_list) + tmle_task <- survival_tx_task(data_list$long_data, data_list$long_node_list, variable_types) + + return(tmle_task) + }, + + make_initial_likelihood = function(tmle_task, learner_list = NULL) { + likelihood <- survival_tx_likelihood(tmle_task, learner_list) + return(likelihood) + }, + make_updater = function(convergence_type = "sample_size", verbose = TRUE, ...) { + if (!is.null(self$options$verbose)) { + verbose <- self$options$verbose + } + + updater <- tmle3_Update$new(maxit = 100, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = self$options$delta_epsilon, constrain_step = TRUE, bounds = c(0.0025), ...) + + return(updater) + }, + + make_params = function(tmle_task, likelihood) { + treatment_value <- self$options$treatment_level + control_value <- self$options$control_level + + treatment <- define_lf(LF_static, "A", value = treatment_value) + control <- define_lf(LF_static, "A", value = control_value) + + # TODO: currently support treatment specific + # TODO: check + param_surv <- Param_coxph$new(likelihood, self$options$formula, treatment, control, + outcome_node = "N" + ) + tmle_params <- list(param_surv) + return(tmle_params) + } + ), + active = list(), + private = list() +) diff --git a/R/tmle3_Task.R b/R/tmle3_Task.R index 4dbd57e7..88c6c7d7 100644 --- a/R/tmle3_Task.R +++ b/R/tmle3_Task.R @@ -200,7 +200,7 @@ tmle3_Task <- R6Class( nodes$covariates <- covariates - regression_data <- do.call(cbind, c(all_covariate_data, outcome_data, node_data)) + regression_data <- as.data.table(do.call(cbind, c(all_covariate_data, outcome_data, node_data))) if ((is_time_variant) && (!is.null(self$nodes$time))) { regression_data$time <- self$time @@ -216,8 +216,8 @@ tmle3_Task <- R6Class( } else { censoring <- rep(FALSE, nrow(regression_data)) } - - if (drop_censored) { + # TODO: multiviarate outcomes break in the else statement. + if (drop_censored || length(outcome) > 1) { indices <- intersect(indices, which(!censoring)) } else { # impute arbitrary value for node Need to keep the data shape the same, @@ -243,8 +243,6 @@ tmle3_Task <- R6Class( - - suppressWarnings({ regression_task <- sl3_Task$new( regression_data, diff --git a/R/tmle3_Update.R b/R/tmle3_Update.R index 0234738e..a14f15f6 100644 --- a/R/tmle3_Update.R +++ b/R/tmle3_Update.R @@ -171,8 +171,10 @@ tmle3_Update <- R6Class( # scale observed and predicted values for bounded continuous observed <- tmle_task$scale(observed, update_node) initial <- tmle_task$scale(initial, update_node) - weights <- tmle_task$get_regression_task(update_node)$weights - + weights <- tmle_task$get_regression_task(update_node, is_time_variant = likelihood$factor_list[[update_node]]$is_time_variant)$weights + if (length(weights) != length(initial) || any(is.na(weights))) { + stop("Weights do not match length or are missing values.") + } # protect against qlogis(1)=Inf @@ -213,6 +215,7 @@ tmle3_Update <- R6Class( return(submodel_data) }, fit_submodel = function(submodel_data) { + # Extract submodel spec info EDnormed <- submodel_data$EDnormed @@ -257,7 +260,8 @@ tmle3_Update <- R6Class( method = "Brent" ) epsilon <- optim_fit$par - # Qnew <- self$apply_submodel(submodel, submodel_data, epsilon) + Qnew <- self$apply_submodel(submodel, submodel_data, epsilon) + # print(colMeans(submodel_data$H*(submodel_data$observed - Qnew))) } else { epsilon <- self$delta_epsilon @@ -285,9 +289,14 @@ tmle3_Update <- R6Class( start = rep(0, ncol(submodel_data$H)) ) }) + + + # Qnew <- family_object$linkinv(family_object$linkfun(submodel_data$initial) + submodel_data$H %*% coef(submodel_fit)) + # print(colMeans(submodel_data$H*(submodel_data$observed - Qnew))) + # Qnew <- family_object$linkinv(family_object$linkfun(submodel_data$initial) + submodel_data$H %*% coef(submodel_fit) ) } else if (self$fluctuation_type == "weighted") { if (self$one_dimensional) { diff --git a/R/utils.R b/R/utils.R index ecd1eaef..efaa9a0b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -42,7 +42,7 @@ summary_from_estimates <- function(task, estimates, param_types = NULL, IC <- lapply(estimates, `[[`, "IC") IC <- do.call(cbind, IC) # for repeated measures, average IC values to get subject-level IC values - if (length(unique(task$id)) < length(task$id)) { + if (length(unique(task$id)) < length(task$id) && nrow(IC) != length(unique(task$id))) { combined <- (by(IC, as.numeric(task$id), colMeans, simplify = FALSE)) IC <- do.call(rbind, combined) } diff --git a/man/Param_ATC.Rd b/man/Param_ATC.Rd index 086335b5..ab18aa54 100644 --- a/man/Param_ATC.Rd +++ b/man/Param_ATC.Rd @@ -61,6 +61,7 @@ Other Parameters: \code{\link{Param_MSM}}, \code{\link{Param_TSM}}, \code{\link{Param_base}}, +\code{\link{Param_coxph}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, diff --git a/man/Param_ATE.Rd b/man/Param_ATE.Rd index 19beff27..5855fb1a 100644 --- a/man/Param_ATE.Rd +++ b/man/Param_ATE.Rd @@ -51,6 +51,7 @@ Other Parameters: \code{\link{Param_MSM}}, \code{\link{Param_TSM}}, \code{\link{Param_base}}, +\code{\link{Param_coxph}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, diff --git a/man/Param_ATT.Rd b/man/Param_ATT.Rd index 45aa0e50..2e67b5b2 100644 --- a/man/Param_ATT.Rd +++ b/man/Param_ATT.Rd @@ -61,6 +61,7 @@ Other Parameters: \code{\link{Param_MSM}}, \code{\link{Param_TSM}}, \code{\link{Param_base}}, +\code{\link{Param_coxph}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, diff --git a/man/Param_MSM.Rd b/man/Param_MSM.Rd index d1995b00..02d7e030 100644 --- a/man/Param_MSM.Rd +++ b/man/Param_MSM.Rd @@ -59,6 +59,7 @@ Other Parameters: \code{\link{Param_ATT}}, \code{\link{Param_TSM}}, \code{\link{Param_base}}, +\code{\link{Param_coxph}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, diff --git a/man/Param_TSM.Rd b/man/Param_TSM.Rd index f730ecb8..a8d3d7b0 100644 --- a/man/Param_TSM.Rd +++ b/man/Param_TSM.Rd @@ -55,6 +55,7 @@ Other Parameters: \code{\link{Param_ATT}}, \code{\link{Param_MSM}}, \code{\link{Param_base}}, +\code{\link{Param_coxph}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, diff --git a/man/Param_base.Rd b/man/Param_base.Rd index 9ef4ff6f..1ad7cbc5 100644 --- a/man/Param_base.Rd +++ b/man/Param_base.Rd @@ -69,6 +69,7 @@ Other Parameters: \code{\link{Param_ATT}}, \code{\link{Param_MSM}}, \code{\link{Param_TSM}}, +\code{\link{Param_coxph}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, diff --git a/man/Param_coxph.Rd b/man/Param_coxph.Rd new file mode 100644 index 00000000..18a7253b --- /dev/null +++ b/man/Param_coxph.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Param_coxph.R +\name{Param_coxph} +\alias{Param_coxph} +\title{Nonparametric inference for user-specified parametric working models for the conditional treatment effect. +The true conditional average treatment effect is projected onto a parametric working model using least-squares regression. +Unlike \code{Param_npCATT}, this function uses all observations to compute the projection. +This can be used to assess heterogeneity of the average treatment effect. +We note that \code{formula_coxph = ~ 1} gives an estimator of the nonparametric average treatment effect (ATE).} +\format{ +\code{\link{R6Class}} object. +} +\value{ +\code{Param_base} object +} +\description{ +Parameter definition for the Average Treatment Effect (ATE). +} +\section{Constructor}{ + +\code{define_param(Param_ATT, observed_likelihood, intervention_list, ..., outcome_node)} + +\describe{ +\item{\code{observed_likelihood}}{A \code{\link{Likelihood}} cocoxphesponding to the observed likelihood +} +\item{\code{formula_coxph}}{... +} +\item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention. +} +\item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention. +} +\item{\code{...}}{Not cucoxphently used. +} +\item{\code{outcome_node}}{character, the name of the node that should be treated as the outcome +} +} +} + +\section{Fields}{ + +\describe{ +\item{\code{cf_likelihood_treatment}}{the counterfactual likelihood for the treatment +} +\item{\code{cf_likelihood_control}}{the counterfactual likelihood for the control +} +\item{\code{intervention_list_treatment}}{A list of objects inheriting from \code{\link{LF_base}}, representing the treatment intervention +} +\item{\code{intervention_list_control}}{A list of objects inheriting from \code{\link{LF_base}}, representing the control intervention +} +} +} + +\seealso{ +Other Parameters: +\code{\link{Param_ATC}}, +\code{\link{Param_ATE}}, +\code{\link{Param_ATT}}, +\code{\link{Param_MSM}}, +\code{\link{Param_TSM}}, +\code{\link{Param_base}}, +\code{\link{Param_delta}}, +\code{\link{Param_mean}}, +\code{\link{Param_npCATE}}, +\code{\link{Param_npCATT}}, +\code{\link{Param_npOR}}, +\code{\link{Param_npRR}}, +\code{\link{Param_npTSM}}, +\code{\link{Param_spCATE}}, +\code{\link{Param_spOR}}, +\code{\link{Param_spRR}}, +\code{\link{Param_stratified}}, +\code{\link{Param_survival}}, +\code{\link{define_param}()}, +\code{\link{tmle3_Fit}} +} +\concept{Parameters} +\keyword{data} diff --git a/man/Param_delta.Rd b/man/Param_delta.Rd index 58f17aaa..415755c3 100644 --- a/man/Param_delta.Rd +++ b/man/Param_delta.Rd @@ -17,6 +17,7 @@ Other Parameters: \code{\link{Param_MSM}}, \code{\link{Param_TSM}}, \code{\link{Param_base}}, +\code{\link{Param_coxph}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, diff --git a/man/Param_mean.Rd b/man/Param_mean.Rd index 96dd7646..56d9b9dc 100644 --- a/man/Param_mean.Rd +++ b/man/Param_mean.Rd @@ -45,6 +45,7 @@ Other Parameters: \code{\link{Param_MSM}}, \code{\link{Param_TSM}}, \code{\link{Param_base}}, +\code{\link{Param_coxph}}, \code{\link{Param_delta}}, \code{\link{Param_npCATE}}, \code{\link{Param_npCATT}}, diff --git a/man/Param_npCATE.Rd b/man/Param_npCATE.Rd index 7a433c9c..24cc65ea 100644 --- a/man/Param_npCATE.Rd +++ b/man/Param_npCATE.Rd @@ -58,6 +58,7 @@ Other Parameters: \code{\link{Param_MSM}}, \code{\link{Param_TSM}}, \code{\link{Param_base}}, +\code{\link{Param_coxph}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATT}}, diff --git a/man/Param_npCATT.Rd b/man/Param_npCATT.Rd index fe69aca6..1b7e88a2 100644 --- a/man/Param_npCATT.Rd +++ b/man/Param_npCATT.Rd @@ -60,6 +60,7 @@ Other Parameters: \code{\link{Param_MSM}}, \code{\link{Param_TSM}}, \code{\link{Param_base}}, +\code{\link{Param_coxph}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, diff --git a/man/Param_npOR.Rd b/man/Param_npOR.Rd index 6647e4f9..12591f51 100644 --- a/man/Param_npOR.Rd +++ b/man/Param_npOR.Rd @@ -64,6 +64,7 @@ Other Parameters: \code{\link{Param_MSM}}, \code{\link{Param_TSM}}, \code{\link{Param_base}}, +\code{\link{Param_coxph}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, diff --git a/man/Param_npRR.Rd b/man/Param_npRR.Rd index db06f3e8..bb324af0 100644 --- a/man/Param_npRR.Rd +++ b/man/Param_npRR.Rd @@ -58,6 +58,7 @@ Other Parameters: \code{\link{Param_MSM}}, \code{\link{Param_TSM}}, \code{\link{Param_base}}, +\code{\link{Param_coxph}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, diff --git a/man/Param_npTSM.Rd b/man/Param_npTSM.Rd index fdd237ff..24ea8f25 100644 --- a/man/Param_npTSM.Rd +++ b/man/Param_npTSM.Rd @@ -58,6 +58,7 @@ Other Parameters: \code{\link{Param_MSM}}, \code{\link{Param_TSM}}, \code{\link{Param_base}}, +\code{\link{Param_coxph}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, diff --git a/man/Param_spCATE.Rd b/man/Param_spCATE.Rd index d8ac0942..5da97274 100644 --- a/man/Param_spCATE.Rd +++ b/man/Param_spCATE.Rd @@ -60,6 +60,7 @@ Other Parameters: \code{\link{Param_MSM}}, \code{\link{Param_TSM}}, \code{\link{Param_base}}, +\code{\link{Param_coxph}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, diff --git a/man/Param_spOR.Rd b/man/Param_spOR.Rd index d6bf15ef..ba346d95 100644 --- a/man/Param_spOR.Rd +++ b/man/Param_spOR.Rd @@ -60,6 +60,7 @@ Other Parameters: \code{\link{Param_MSM}}, \code{\link{Param_TSM}}, \code{\link{Param_base}}, +\code{\link{Param_coxph}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, diff --git a/man/Param_spRR.Rd b/man/Param_spRR.Rd index b2c3fa4a..f4b774c3 100644 --- a/man/Param_spRR.Rd +++ b/man/Param_spRR.Rd @@ -60,6 +60,7 @@ Other Parameters: \code{\link{Param_MSM}}, \code{\link{Param_TSM}}, \code{\link{Param_base}}, +\code{\link{Param_coxph}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, diff --git a/man/Param_stratified.Rd b/man/Param_stratified.Rd index 8016b4ae..9e234fa2 100644 --- a/man/Param_stratified.Rd +++ b/man/Param_stratified.Rd @@ -55,6 +55,7 @@ Other Parameters: \code{\link{Param_MSM}}, \code{\link{Param_TSM}}, \code{\link{Param_base}}, +\code{\link{Param_coxph}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, diff --git a/man/Param_survival.Rd b/man/Param_survival.Rd index ef8d0b5f..c2eae71f 100644 --- a/man/Param_survival.Rd +++ b/man/Param_survival.Rd @@ -46,6 +46,7 @@ Other Parameters: \code{\link{Param_MSM}}, \code{\link{Param_TSM}}, \code{\link{Param_base}}, +\code{\link{Param_coxph}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, diff --git a/man/define_param.Rd b/man/define_param.Rd index 0fd40f66..4454478e 100644 --- a/man/define_param.Rd +++ b/man/define_param.Rd @@ -22,6 +22,7 @@ Other Parameters: \code{\link{Param_MSM}}, \code{\link{Param_TSM}}, \code{\link{Param_base}}, +\code{\link{Param_coxph}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, diff --git a/man/tmle3_Fit.Rd b/man/tmle3_Fit.Rd index 0a9e38b0..9ef5c3a3 100644 --- a/man/tmle3_Fit.Rd +++ b/man/tmle3_Fit.Rd @@ -103,6 +103,7 @@ Other Parameters: \code{\link{Param_MSM}}, \code{\link{Param_TSM}}, \code{\link{Param_base}}, +\code{\link{Param_coxph}}, \code{\link{Param_delta}}, \code{\link{Param_mean}}, \code{\link{Param_npCATE}}, diff --git a/man/tmle3_Spec_coxph.Rd b/man/tmle3_Spec_coxph.Rd new file mode 100644 index 00000000..4e4e7f0c --- /dev/null +++ b/man/tmle3_Spec_coxph.Rd @@ -0,0 +1,8 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tmle3_Spec_coxph.R +\name{tmle3_Spec_coxph} +\alias{tmle3_Spec_coxph} +\title{Defines a TML Estimator (except for the data)} +\description{ +Defines a TML Estimator (except for the data) +} diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index a9ee75e1..dba97388 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -7,9 +7,66 @@ output: html_document knitr::opts_chunk$set(echo = TRUE) ``` + +```{r} + +tmle_task$get_regression_task("W")$Y + +``` ## R Markdown +```{r} + + +vet_data <- read.csv("https://raw.githubusercontent.com/tlverse/deming2019-workshop/master/data/veteran.csv") +vet_data$trt <- vet_data$trt - 1 +vet_data$time <- ceiling(vet_data$time / 20) +node_list <- list( + W = c("celltype", "karno", "diagtime", "age", "prior"), A = "trt", T_tilde = "time", Delta = "status", + time = "t", N = "N", A_c = "A_c", id = "X", pre_failure = "pre_failure" +) + +survival_spec <- tmle3_Spec_coxph$new(formula = ~ 1 + as.numeric(karno), + treatment_level = 1, control_level = 0 +) + +learner_list <- list(A_c = Lrnr_glmnet$new(), A = Lrnr_glmnet$new(), N = Lrnr_glmnet$new()) + +tmle_task <- survival_spec$make_tmle_task(vet_data, node_list) + +initial_likelihood <- survival_spec$make_initial_likelihood(tmle_task, learner_list) + +up <- tmle3_Update$new( + one_dimensional = T, + maxit = 15, + cvtmle = FALSE, + constrain_step = T, + convergence_type = "scaled_var", + delta_epsilon = 0.05, + use_best = F, + verbose = TRUE +) + +targeted_likelihood <- Targeted_Likelihood$new(initial_likelihood, updater = up) +tmle_params <- survival_spec$make_params(tmle_task, targeted_likelihood) + + +``` + +```{R} +tmle_fit_manual <- fit_tmle3( + tmle_task, targeted_likelihood, tmle_params, + targeted_likelihood$updater +) +tmle_fit_manual +``` + +```{r} +initial_likelihood$get_likelihoods(tmle_task) + + +``` ```{r} passes <- c() passes1 <- c() From f019a0f5903950eb5091e68f156b018891f6312d Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Tue, 7 Sep 2021 20:25:38 -0700 Subject: [PATCH 56/65] added Param_coxph --- R/tmle3_Spec_coxph.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/tmle3_Spec_coxph.R b/R/tmle3_Spec_coxph.R index e43786a3..c6c750c6 100644 --- a/R/tmle3_Spec_coxph.R +++ b/R/tmle3_Spec_coxph.R @@ -25,6 +25,7 @@ tmle3_Spec_coxph <- R6Class( # TODO: check transform_data = function(data, node_list) { + data <- as.data.frame(data) T_tilde_name <- node_list$T_tilde Delta_name <- node_list$Delta T_tilde_data <- data[T_tilde_name] From 1fcd721891accc8556e9213214e4663e4433c8f2 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Tue, 7 Sep 2021 21:39:07 -0700 Subject: [PATCH 57/65] added Param_coxph --- R/Param_coxph.R | 10 +++++----- vignettes/testing.Rmd | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/Param_coxph.R b/R/Param_coxph.R index a2b0ec60..22fcaa59 100644 --- a/R/Param_coxph.R +++ b/R/Param_coxph.R @@ -53,13 +53,13 @@ Param_coxph <- R6Class( inherit = Param_base, public = list( initialize = function(observed_likelihood, formula_coxph = ~1, intervention_list_treatment, intervention_list_control, family_fluctuation = c("binomial"), outcome_node = "N") { - print(outcome_node) + super$initialize(observed_likelihood, list(), outcome_node = outcome_node) family_fluctuation <- match.arg(family_fluctuation) training_task <- self$observed_likelihood$training_task W <- training_task$get_regression_task("W", is_time_variant = TRUE)$Y - print(W) - print(formula_coxph) + + V <- model.matrix(formula_coxph, as.data.frame(W)) private$.formula_names <- colnames(V) private$.targeted <- rep(T, ncol(V)) @@ -125,7 +125,7 @@ Param_coxph <- R6Class( S_censor_mat <- cbind(1, S_censor_mat[, -ncol(S_censor_mat)]) S_censor <- as.vector(S_censor_mat) # Back to long, CHECK - beta <- suppressWarnings(coef(glm.fit(Vt, pN1, offset = log(pN0), family = poisson(), weights = prefailure * self$weights / S_censor))) + beta <- suppressWarnings(coef(glm.fit(Vt, pN1, offset = log(pN0), family = poisson(), weights = self$weights))) HR <- as.vector(exp(Vt %*% beta)) t_grid <- sort(unique(time)) @@ -198,7 +198,7 @@ Param_coxph <- R6Class( - beta <- suppressWarnings(coef(glm.fit(Vt, pN1, offset = log(pN0), family = poisson(), weights = prefailure * self$weights / S_censor))) + beta <- suppressWarnings(coef(glm.fit(Vt, pN1, offset = log(pN0), family = poisson(), weights = self$weights ))) HR <- exp(Vt %*% beta) diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index dba97388..cbd9af1a 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -21,11 +21,11 @@ vet_data <- read.csv("https://raw.githubusercontent.com/tlverse/deming2019-works vet_data$trt <- vet_data$trt - 1 vet_data$time <- ceiling(vet_data$time / 20) node_list <- list( - W = c("celltype", "karno", "diagtime", "age", "prior"), A = "trt", T_tilde = "time", Delta = "status", + W = c("celltype" ), A = "trt", T_tilde = "time", Delta = "status", time = "t", N = "N", A_c = "A_c", id = "X", pre_failure = "pre_failure" ) -survival_spec <- tmle3_Spec_coxph$new(formula = ~ 1 + as.numeric(karno), +survival_spec <- tmle3_Spec_coxph$new(formula = ~ 1 , treatment_level = 1, control_level = 0 ) From a6b3587c9f72ca3c551833417b22d01387d0f108 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Wed, 8 Sep 2021 15:22:10 -0700 Subject: [PATCH 58/65] change OR default submodel --- R/Param_npOR.R | 2 +- R/Param_spOR.R | 2 +- vignettes/testing.Rmd | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/Param_npOR.R b/R/Param_npOR.R index 64195127..7212b47a 100644 --- a/R/Param_npOR.R +++ b/R/Param_npOR.R @@ -201,7 +201,7 @@ Param_npOR <- R6Class( .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, .formula_logOR = NULL, - .submodel = list(Y = "gaussian_identity"), + .submodel = list(Y = "binomial_logit"), .formula_names = NULL ) ) diff --git a/R/Param_spOR.R b/R/Param_spOR.R index ef951b41..269972c7 100644 --- a/R/Param_spOR.R +++ b/R/Param_spOR.R @@ -182,7 +182,7 @@ Param_spOR <- R6Class( .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, .formula_logOR = NULL, - .submodel = list(Y = "gaussian_identity"), + .submodel = list(Y = "binomial_logit"), .formula_names = NULL ) ) diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index cbd9af1a..c4bffaf7 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -29,7 +29,7 @@ survival_spec <- tmle3_Spec_coxph$new(formula = ~ 1 , treatment_level = 1, control_level = 0 ) -learner_list <- list(A_c = Lrnr_glmnet$new(), A = Lrnr_glmnet$new(), N = Lrnr_glmnet$new()) +learner_list <- list(A_c = Lrnr_xgboost$new(max_depth = 4), A = Lrnr_xgboost$new(max_depth = 4), N = Lrnr_xgboost$new(max_depth = 4)) tmle_task <- survival_spec$make_tmle_task(vet_data, node_list) From 7969124ba140b6559338a01df7f14053638468a5 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Wed, 8 Sep 2021 19:49:47 -0700 Subject: [PATCH 59/65] sort of fixed coxph --- R/Param_coxph.R | 25 +++++++--- R/helpers_survival.R | 8 ++-- R/tmle3_Spec_coxph.R | 2 +- R/tmle3_Update.R | 2 + vignettes/testing.Rmd | 107 +++++++++++++++++++++++++++++++++--------- 5 files changed, 111 insertions(+), 33 deletions(-) diff --git a/R/Param_coxph.R b/R/Param_coxph.R index 22fcaa59..b2fb6826 100644 --- a/R/Param_coxph.R +++ b/R/Param_coxph.R @@ -113,8 +113,8 @@ Param_coxph <- R6Class( pN <- self$observed_likelihood$get_likelihoods(tmle_task, "N", fold_number) pC <- self$observed_likelihood$get_likelihoods(tmle_task, "A_c", fold_number) - pN0 <- as.vector(self$cf_likelihood_treatment$get_likelihoods(cf_task0, "N", fold_number)) - pN1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "N", fold_number) + pN0 <- as.vector(self$observed_likelihood$get_likelihoods(cf_task0, "N", fold_number)) + pN1 <- self$observed_likelihood$get_likelihoods(cf_task1, "N", fold_number) time <- tmle_task$time id <- tmle_task$id @@ -123,7 +123,12 @@ Param_coxph <- R6Class( pC_mat <- self$long_to_mat(pC, id, time) S_censor_mat <- self$hm_to_sm(pC_mat) S_censor_mat <- cbind(1, S_censor_mat[, -ncol(S_censor_mat)]) - S_censor <- as.vector(S_censor_mat) # Back to long, CHECK + S_censor <- pmax(as.vector(S_censor_mat), 0.005)# Back to long, CHECK + pN_mat <- self$long_to_mat(pN, id, time) + S_surv_mat <- self$hm_to_sm(pN_mat) + S_surv_mat <- cbind(1, S_surv_mat[, -ncol(S_surv_mat)]) + + S_surv <- pmax(as.vector(S_surv_mat), 0.005) beta <- suppressWarnings(coef(glm.fit(Vt, pN1, offset = log(pN0), family = poisson(), weights = self$weights))) HR <- as.vector(exp(Vt %*% beta)) @@ -131,10 +136,12 @@ Param_coxph <- R6Class( t_grid <- sort(unique(time)) - H <- as.matrix(Vt * (prefailure / S_censor) * (A / g1 * HR - (1 - A) / g0)) + H <- as.matrix(Vt * (prefailure / S_censor / S_surv) * (A / g1 * HR - (1 - A) / g0)) + print(quantile(H)) EIF_N <- NULL + # Store EIF component if (is_training_task) { scale <- apply(Vt, 2, function(v) { @@ -142,6 +149,7 @@ Param_coxph <- R6Class( }) + scaleinv <- solve(scale) EIF_N <- self$weights * (H %*% scaleinv) * as.vector(dNt - pN) EIF_WA <- apply(Vt, 2, function(v) { @@ -150,6 +158,9 @@ Param_coxph <- R6Class( means <- colMeans(wide_vec) as.vector(t(t(wide_vec) - means)) }) %*% scaleinv + + + } @@ -186,8 +197,8 @@ Param_coxph <- R6Class( pN <- self$observed_likelihood$get_likelihoods(tmle_task, "N", fold_number) pC <- self$observed_likelihood$get_likelihoods(tmle_task, "A_c", fold_number) - pN0 <- self$cf_likelihood_treatment$get_likelihoods(cf_task0, "N", fold_number) - pN1 <- self$cf_likelihood_treatment$get_likelihoods(cf_task1, "N", fold_number) + pN0 <- self$observed_likelihood$get_likelihoods(cf_task0, "N", fold_number) + pN1 <- self$observed_likelihood$get_likelihoods(cf_task1, "N", fold_number) @@ -204,7 +215,7 @@ Param_coxph <- R6Class( HR <- exp(Vt %*% beta) IC <- as.matrix(EIF) - print(colMeans(IC)) + result <- list(psi = beta, IC = IC, HR = HR, transform = exp) return(result) } diff --git a/R/helpers_survival.R b/R/helpers_survival.R index 331e620c..465fc583 100644 --- a/R/helpers_survival.R +++ b/R/helpers_survival.R @@ -11,7 +11,7 @@ #' @param ... extra arguments. #' @export #' @rdname survival_tx -survival_tx_npsem <- function(node_list, variable_types = NULL) { +survival_tx_npsem <- function(node_list, variable_types = NULL ) { # make the tmle task # define censoring (lost to followup node) @@ -23,11 +23,13 @@ survival_tx_npsem <- function(node_list, variable_types = NULL) { define_node("T_tilde", node_list$T_tilde, c("A", "W"), variable_type = variable_types$T_tilde), define_node("Delta", node_list$Delta, variable_type = variable_types$Delta), censoring, - # TODO: remove t parent, handle in get_regression define_node("N", node_list$N, c("A", "W"), variable_type = variable_types$N, censoring_node = censoring), + # TODO: remove t parent, handle in get_regression + define_node("A_c", node_list$A_c, c("A", "W"), variable_type = variable_types$A_c, censoring_node = censoring) ) + return(npsem) } @@ -36,7 +38,7 @@ survival_tx_npsem <- function(node_list, variable_types = NULL) { survival_tx_task <- function(data, node_list, variable_types = NULL, ...) { setDT(data) - npsem <- survival_tx_npsem(node_list, variable_types) + npsem <- survival_tx_npsem(node_list, variable_types ) if (!is.null(node_list$id)) { tmle_task <- tmle3_Task$new(data, npsem = npsem, id = node_list$id, time = node_list$time, ...) diff --git a/R/tmle3_Spec_coxph.R b/R/tmle3_Spec_coxph.R index c6c750c6..a43d4c8a 100644 --- a/R/tmle3_Spec_coxph.R +++ b/R/tmle3_Spec_coxph.R @@ -61,7 +61,7 @@ tmle3_Spec_coxph <- R6Class( make_tmle_task = function(data, node_list, ...) { variable_types <- self$options$variable_types data_list <- self$transform_data(data, node_list) - tmle_task <- survival_tx_task(data_list$long_data, data_list$long_node_list, variable_types) + tmle_task <- survival_tx_task(data_list$long_data, data_list$long_node_list, variable_types ) return(tmle_task) }, diff --git a/R/tmle3_Update.R b/R/tmle3_Update.R index a14f15f6..1dbb3f34 100644 --- a/R/tmle3_Update.R +++ b/R/tmle3_Update.R @@ -170,7 +170,9 @@ tmle3_Update <- R6Class( # scale observed and predicted values for bounded continuous observed <- tmle_task$scale(observed, update_node) + # TODO sometimes prediction bounds suprass outcome bounds which leads to error initial <- tmle_task$scale(initial, update_node) + weights <- tmle_task$get_regression_task(update_node, is_time_variant = likelihood$factor_list[[update_node]]$is_time_variant)$weights if (length(weights) != length(initial) || any(is.na(weights))) { stop("Weights do not match length or are missing values.") diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index c4bffaf7..34366d9a 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -7,7 +7,68 @@ output: html_document knitr::opts_chunk$set(echo = TRUE) ``` +```{r} +require(simcausal) +library(sl3) +``` + +```{r} +passes<-c() + for(i in 1:200){ + tmax <- 4 + print(i) +D <- DAG.empty() +D <- D + node("W1", distr = "runif", min = -1, max = 1) + + node("W2", distr = "runif", min = -1, max = 1) + + node("A", distr = "rbinom", size = 1, prob = plogis(W1 + W2 )) + + node("dNt", t = 1:tmax, EFU = TRUE , distr = "rbinom", size = 1, prob = exp(0.5*A)*0.15*plogis(W1 + W2 )) + + node("dCt", t = 1:tmax, EFU = TRUE, distr = "rbinom", size = 1, prob = 0*plogis(W1 + W2 + t)) +D <- set.DAG(D) +data <- sim(D, n = 1000) +data + +data_N <- data[, grep("[d][N].+", colnames(data))] +data_C <- data[, grep("[d][C].+", colnames(data))] + +data_surv <- as.data.frame(do.call(rbind, lapply(1:nrow(data), function(i) { + rowN <- data_N[i,] + rowC <- data_C[i,] + t <- which(rowN==1) + tc <- which(rowC==1) + if(length(tc)==0){ + tc <- tmax + } + if(length(t)==0){ + t <- tmax+2 + } + Ttilde <- min(t,tc) + Delta <- t <= tc + return(matrix(c(Ttilde,Delta), nrow=1)) +}))) +colnames(data_surv) <- c("Ttilde", "Delta") +data$Ttilde <- data_surv$Ttilde + data$Delta <- data_surv$Delta + data <- data[, -grep("[d][C].+", colnames(data))] + data <- data[, -grep("[d][N].+", colnames(data))] + data + + + doMC::registerDoMC(10) + + tmle_spec_np <- tmle3_Spec_coxph$new(formula = ~1, delta_epsilon = 0.1, verbose = T, treatment_level = 1, control_level = 0) + learner_list <- list(A = Lrnr_gam$new() , N = Lrnr_hal9001$new(smoothness_orders = 0, num_knots = 20, max_degree = 1), A_c = Lrnr_hal9001$new(smoothness_orders = 0, num_knots = 20, max_degree = 1) ) + node_list <- list( W = c("W1", "W2"), A = "A", T_tilde = "Ttilde", Delta = "Delta" ) + + tmle3_fit <- suppressMessages(suppressWarnings(tmle3(tmle_spec_np, data, node_list, learner_list))) + + print(tmle3_fit$summary) + passes <- c(passes, tmle3_fit$summary$lower <= 0.5 & tmle3_fit$summary$upper >= 0.5 ) + print(mean(passes)) +} + + +``` ```{r} tmle_task$get_regression_task("W")$Y @@ -67,12 +128,13 @@ initial_likelihood$get_likelihoods(tmle_task) ``` + ```{r} passes <- c() passes1 <- c() passes2 <- c() -#for(i in 1:1){ +for(i in 1:100){ print(i) n <- 500 @@ -87,27 +149,28 @@ node_list <- list (W = "W", A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W ) # spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1, "CATE") # out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) +print("CATE") spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "CATE") -suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) - out <- out$summary -passes <- cbind(passes , out$lower <= 1 & out$upper >= 1) -out - -spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "CATT", family = "binomial") +# suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) +# out <- out$summary +# passes <- cbind(passes , out$lower <= 1 & out$upper >= 1) +# out +print("CATT") +spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "CATT") (out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) - out <- out$summary + out$summary passes1 <- cbind(passes1 , out$lower <= 1 & out$upper >= 1) -out - -spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W, "CATE") -suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) - out <- out$summary -passes2 <- cbind(passes2 , out$lower <= 1 & out$upper >= 1) -out -print(rowMeans(passes)) -print(rowMeans(passes1)) -print(rowMeans(passes2)) +# print("CATE") +# spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W, "CATE") +# suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) +# out <- out$summary +# passes2 <- cbind(passes2 , out$lower <= 1 & out$upper >= 1) +# out +# print(rowMeans(passes)) +# print(rowMeans(passes1)) +# print(rowMeans(passes2)) +} ``` @@ -138,17 +201,17 @@ out passes <- c() passes1 <- c() -for(i in 1:1){ +for(i in 1:100){ print(i) library(sl3) -n <- 500 +n <- 750 W <- runif(n, -1, 1) A <- rbinom(n, size = 1, prob = plogis(0)) Y <- rbinom(n, size = 1, prob = plogis(A + W + A*W)) quantile(plogis(1 + W) * (1-plogis(1 + W)) / ( plogis( W) * (1-plogis( W)))) data <- data.table(W,A,Y) -lrnr_Y0W <- Lrnr_glm$new() -lrnr_A <- Lrnr_glm$new() +lrnr_Y0W <- Lrnr_earth$new() +lrnr_A <- Lrnr_earth$new() node_list <- list (W = "W", A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W) spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W, "OR") From e9d587fc24c48084f8aa70360bc75e1df999c901 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Wed, 8 Sep 2021 19:50:40 -0700 Subject: [PATCH 60/65] sort of fixed coxph --- R/Param_coxph.R | 2 +- vignettes/testing.Rmd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/Param_coxph.R b/R/Param_coxph.R index b2fb6826..7f88f0ee 100644 --- a/R/Param_coxph.R +++ b/R/Param_coxph.R @@ -138,7 +138,7 @@ Param_coxph <- R6Class( H <- as.matrix(Vt * (prefailure / S_censor / S_surv) * (A / g1 * HR - (1 - A) / g0)) - print(quantile(H)) + #print(quantile(H)) EIF_N <- NULL diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index 34366d9a..529196f8 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -62,7 +62,7 @@ data$Ttilde <- data_surv$Ttilde tmle3_fit <- suppressMessages(suppressWarnings(tmle3(tmle_spec_np, data, node_list, learner_list))) - print(tmle3_fit$summary) + passes <- c(passes, tmle3_fit$summary$lower <= 0.5 & tmle3_fit$summary$upper >= 0.5 ) print(mean(passes)) } From 3a3bfa8ac466c276d29dad3504ecc7e08a697a74 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Wed, 8 Sep 2021 20:38:12 -0700 Subject: [PATCH 61/65] hi --- R/Param_coxph.R | 23 ++++++++++------------- R/helpers_survival.R | 4 ++-- R/tmle3_Spec_coxph.R | 2 +- vignettes/testing.Rmd | 15 ++++++++------- 4 files changed, 21 insertions(+), 23 deletions(-) diff --git a/R/Param_coxph.R b/R/Param_coxph.R index 7f88f0ee..b462d77b 100644 --- a/R/Param_coxph.R +++ b/R/Param_coxph.R @@ -53,7 +53,6 @@ Param_coxph <- R6Class( inherit = Param_base, public = list( initialize = function(observed_likelihood, formula_coxph = ~1, intervention_list_treatment, intervention_list_control, family_fluctuation = c("binomial"), outcome_node = "N") { - super$initialize(observed_likelihood, list(), outcome_node = outcome_node) family_fluctuation <- match.arg(family_fluctuation) training_task <- self$observed_likelihood$training_task @@ -123,7 +122,7 @@ Param_coxph <- R6Class( pC_mat <- self$long_to_mat(pC, id, time) S_censor_mat <- self$hm_to_sm(pC_mat) S_censor_mat <- cbind(1, S_censor_mat[, -ncol(S_censor_mat)]) - S_censor <- pmax(as.vector(S_censor_mat), 0.005)# Back to long, CHECK + S_censor <- pmax(as.vector(S_censor_mat), 0.005) # Back to long, CHECK pN_mat <- self$long_to_mat(pN, id, time) S_surv_mat <- self$hm_to_sm(pN_mat) S_surv_mat <- cbind(1, S_surv_mat[, -ncol(S_surv_mat)]) @@ -136,9 +135,9 @@ Param_coxph <- R6Class( t_grid <- sort(unique(time)) - H <- as.matrix(Vt * (prefailure / S_censor / S_surv) * (A / g1 * HR - (1 - A) / g0)) + H <- as.matrix(Vt * (prefailure / S_censor) * (A / g1 * HR - (1 - A) / g0)) - #print(quantile(H)) + # print(quantile(H)) EIF_N <- NULL @@ -151,23 +150,20 @@ Param_coxph <- R6Class( scaleinv <- solve(scale) - EIF_N <- self$weights * (H %*% scaleinv) * as.vector(dNt - pN) + EIF_N <- self$weights * (H) * as.vector(dNt - pN) EIF_WA <- apply(Vt, 2, function(v) { long_vec <- self$weights * (v * (HR * pN0 - pN1)) wide_vec <- self$long_to_mat(long_vec, id, time) means <- colMeans(wide_vec) as.vector(t(t(wide_vec) - means)) - }) %*% scaleinv - - - + }) } - return(list(N = H, EIF = list(N = EIF_N, WA = EIF_WA))) + return(list(N = H, EIF = list(N = EIF_N, WA = EIF_WA, scaleinv = scaleinv))) }, estimates = function(tmle_task = NULL, fold_number = "full") { if (is.null(tmle_task)) { @@ -188,12 +184,13 @@ Param_coxph <- R6Class( id <- tmle_task$id long_order <- order(id, time) # clever_covariates happen here (for this param) only, but this is repeated computation - EIF <- self$clever_covariates(tmle_task, fold_number, is_training_task = TRUE)$EIF + EIFs <- self$clever_covariates(tmle_task, fold_number, is_training_task = TRUE)$EIF + EIF <- EIFs EIF <- EIF$N + EIF$WA EIF <- apply(EIF, 2, function(col) { rowSums(self$long_to_mat(col, id, time)) - }) + }) %*% EIFs$scaleinv pN <- self$observed_likelihood$get_likelihoods(tmle_task, "N", fold_number) pC <- self$observed_likelihood$get_likelihoods(tmle_task, "A_c", fold_number) @@ -209,7 +206,7 @@ Param_coxph <- R6Class( - beta <- suppressWarnings(coef(glm.fit(Vt, pN1, offset = log(pN0), family = poisson(), weights = self$weights ))) + beta <- suppressWarnings(coef(glm.fit(Vt, pN1, offset = log(pN0), family = poisson(), weights = self$weights))) HR <- exp(Vt %*% beta) diff --git a/R/helpers_survival.R b/R/helpers_survival.R index 465fc583..42a71eb4 100644 --- a/R/helpers_survival.R +++ b/R/helpers_survival.R @@ -11,7 +11,7 @@ #' @param ... extra arguments. #' @export #' @rdname survival_tx -survival_tx_npsem <- function(node_list, variable_types = NULL ) { +survival_tx_npsem <- function(node_list, variable_types = NULL) { # make the tmle task # define censoring (lost to followup node) @@ -38,7 +38,7 @@ survival_tx_npsem <- function(node_list, variable_types = NULL ) { survival_tx_task <- function(data, node_list, variable_types = NULL, ...) { setDT(data) - npsem <- survival_tx_npsem(node_list, variable_types ) + npsem <- survival_tx_npsem(node_list, variable_types) if (!is.null(node_list$id)) { tmle_task <- tmle3_Task$new(data, npsem = npsem, id = node_list$id, time = node_list$time, ...) diff --git a/R/tmle3_Spec_coxph.R b/R/tmle3_Spec_coxph.R index a43d4c8a..c6c750c6 100644 --- a/R/tmle3_Spec_coxph.R +++ b/R/tmle3_Spec_coxph.R @@ -61,7 +61,7 @@ tmle3_Spec_coxph <- R6Class( make_tmle_task = function(data, node_list, ...) { variable_types <- self$options$variable_types data_list <- self$transform_data(data, node_list) - tmle_task <- survival_tx_task(data_list$long_data, data_list$long_node_list, variable_types ) + tmle_task <- survival_tx_task(data_list$long_data, data_list$long_node_list, variable_types) return(tmle_task) }, diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index 529196f8..7bb28e7f 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -15,7 +15,7 @@ library(sl3) ```{r} passes<-c() for(i in 1:200){ - tmax <- 4 + tmax <- 5 print(i) D <- DAG.empty() D <- D + node("W1", distr = "runif", min = -1, max = 1) + @@ -27,20 +27,21 @@ D <- set.DAG(D) data <- sim(D, n = 1000) data -data_N <- data[, grep("[d][N].+", colnames(data))] -data_C <- data[, grep("[d][C].+", colnames(data))] +data_N <- data[, grep("[d][N].+", colnames(data)), drop = F] +data_C <- data[, grep("[d][C].+", colnames(data)), drop = F] data_surv <- as.data.frame(do.call(rbind, lapply(1:nrow(data), function(i) { rowN <- data_N[i,] rowC <- data_C[i,] t <- which(rowN==1) tc <- which(rowC==1) - if(length(tc)==0){ - tc <- tmax - } if(length(t)==0){ t <- tmax+2 } + if(length(tc)==0){ + tc <- tmax + 1 + } + Ttilde <- min(t,tc) Delta <- t <= tc return(matrix(c(Ttilde,Delta), nrow=1)) @@ -62,7 +63,7 @@ data$Ttilde <- data_surv$Ttilde tmle3_fit <- suppressMessages(suppressWarnings(tmle3(tmle_spec_np, data, node_list, learner_list))) - + print(tmle3_fit$summary) passes <- c(passes, tmle3_fit$summary$lower <= 0.5 & tmle3_fit$summary$upper >= 0.5 ) print(mean(passes)) } From d340dc95eafe066c0696613242fc219650f345ab Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Wed, 8 Sep 2021 21:33:54 -0700 Subject: [PATCH 62/65] hi --- R/Param_coxph.R | 6 +++--- vignettes/testing.Rmd | 9 +++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/R/Param_coxph.R b/R/Param_coxph.R index b462d77b..80b9b097 100644 --- a/R/Param_coxph.R +++ b/R/Param_coxph.R @@ -150,13 +150,13 @@ Param_coxph <- R6Class( scaleinv <- solve(scale) - EIF_N <- self$weights * (H) * as.vector(dNt - pN) + EIF_N <- self$weights * (H %*% scaleinv) * as.vector(dNt - pN) EIF_WA <- apply(Vt, 2, function(v) { long_vec <- self$weights * (v * (HR * pN0 - pN1)) wide_vec <- self$long_to_mat(long_vec, id, time) means <- colMeans(wide_vec) as.vector(t(t(wide_vec) - means)) - }) + }) %*% scaleinv } @@ -190,7 +190,7 @@ Param_coxph <- R6Class( EIF <- apply(EIF, 2, function(col) { rowSums(self$long_to_mat(col, id, time)) - }) %*% EIFs$scaleinv + }) #%*% EIFs$scaleinv pN <- self$observed_likelihood$get_likelihoods(tmle_task, "N", fold_number) pC <- self$observed_likelihood$get_likelihoods(tmle_task, "A_c", fold_number) diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index 7bb28e7f..250ade95 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -19,7 +19,8 @@ passes<-c() print(i) D <- DAG.empty() D <- D + node("W1", distr = "runif", min = -1, max = 1) + - node("W2", distr = "runif", min = -1, max = 1) + + node("Wa2", distr = "rbinom", size = 1, prob = 0.5) + + node("W2", distr = "rconst", const =Wa2 - 0.5 ) + node("A", distr = "rbinom", size = 1, prob = plogis(W1 + W2 )) + node("dNt", t = 1:tmax, EFU = TRUE , distr = "rbinom", size = 1, prob = exp(0.5*A)*0.15*plogis(W1 + W2 )) + node("dCt", t = 1:tmax, EFU = TRUE, distr = "rbinom", size = 1, prob = 0*plogis(W1 + W2 + t)) @@ -55,10 +56,10 @@ data$Ttilde <- data_surv$Ttilde - doMC::registerDoMC(10) + doMC::registerDoMC(16) - tmle_spec_np <- tmle3_Spec_coxph$new(formula = ~1, delta_epsilon = 0.1, verbose = T, treatment_level = 1, control_level = 0) - learner_list <- list(A = Lrnr_gam$new() , N = Lrnr_hal9001$new(smoothness_orders = 0, num_knots = 20, max_degree = 1), A_c = Lrnr_hal9001$new(smoothness_orders = 0, num_knots = 20, max_degree = 1) ) + tmle_spec_np <- tmle3_Spec_coxph$new(formula = ~1, delta_epsilon = 0.5, verbose = T, treatment_level = 1, control_level = 0) + learner_list <- list(A = Lrnr_glm$new() , N = Lrnr_glm$new(formula = ~.^2), A_c = Lrnr_glm$new(formula = ~.^2)) node_list <- list( W = c("W1", "W2"), A = "A", T_tilde = "Ttilde", Delta = "Delta" ) tmle3_fit <- suppressMessages(suppressWarnings(tmle3(tmle_spec_np, data, node_list, learner_list))) From fef36139518be0dfe3bbbcc8fb35574366988ba9 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Thu, 9 Sep 2021 16:44:10 -0700 Subject: [PATCH 63/65] plz dont break --- NAMESPACE | 9 +- R/Param_ATC.R | 7 +- R/Param_ATT.R | 5 +- R/Param_base.R | 33 ++++- R/Param_coxph.R | 3 +- R/Param_npCATE.R | 9 +- R/Param_npCATT.R | 8 +- R/Param_npOR.R | 1 - R/Param_npRR.R | 9 +- R/Param_npTSM.R | 8 +- R/Param_spCATE.R | 3 +- R/Param_spOR.R | 3 +- R/Param_spRR.R | 3 +- R/Param_survival.R | 2 +- R/submodels.R | 129 +++++++----------- R/tmle3_Spec_npCausalGLM.R | 20 +-- R/tmle3_Update.R | 16 +-- man/loss_function_least_squares.Rd | 27 ---- man/loss_function_loglik_binomial.Rd | 27 ---- man/loss_function_poisson.Rd | 27 ---- man/loss_loglik.Rd | 16 +++ man/loss_loglik_binomial.Rd | 16 +++ man/loss_poisson.Rd | 16 +++ man/submodel_exp.Rd | 18 --- man/submodel_linear.Rd | 18 --- tests/testthat/test-coxph.R | 57 ++++++++ tests/testthat/test-spnpCATECATT.R | 4 + tests/testthat/{test-spRR.R => test-spnpRR.R} | 0 vignettes/testing.Rmd | 43 +++--- 29 files changed, 259 insertions(+), 278 deletions(-) delete mode 100644 man/loss_function_least_squares.Rd delete mode 100644 man/loss_function_loglik_binomial.Rd delete mode 100644 man/loss_function_poisson.Rd create mode 100644 man/loss_loglik.Rd create mode 100644 man/loss_loglik_binomial.Rd create mode 100644 man/loss_poisson.Rd delete mode 100644 man/submodel_exp.Rd delete mode 100644 man/submodel_linear.Rd create mode 100644 tests/testthat/test-coxph.R rename tests/testthat/{test-spRR.R => test-spnpRR.R} (100%) diff --git a/NAMESPACE b/NAMESPACE index be0ae6d2..ac968cc2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -49,10 +49,9 @@ export(generate_loss_function_from_family) export(generate_submodel_from_family) export(get_propensity_scores) export(get_submodel_spec) -export(loss_function_least_squares) -export(loss_function_loglik) -export(loss_function_loglik_binomial) -export(loss_function_poisson) +export(loss_loglik) +export(loss_loglik_binomial) +export(loss_poisson) export(make_CF_Likelihood) export(make_Likelihood) export(make_submodel_spec) @@ -64,8 +63,6 @@ export(point_tx_task) export(process_missing) export(propensity_score_plot) export(propensity_score_table) -export(submodel_exp) -export(submodel_linear) export(submodel_logistic_switch) export(submodel_spec_logistic_switch) export(summary_from_estimates) diff --git a/R/Param_ATC.R b/R/Param_ATC.R index 41cc4114..49483510 100644 --- a/R/Param_ATC.R +++ b/R/Param_ATC.R @@ -61,6 +61,7 @@ Param_ATC <- R6Class( private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) private$.outcome_node <- outcome_node private$.param_att <- Param_ATT$new(observed_likelihood, intervention_list_control, intervention_list_treatment, outcome_node) + private$.submodel <- private$.param_att$submodel }, clever_covariates = function(tmle_task = NULL, fold_number = "full") { att_cc <- self$param_att$clever_covariates(tmle_task, fold_number) @@ -96,6 +97,9 @@ Param_ATC <- R6Class( }, param_att = function() { return(private$.param_att) + }, + submodel = function(){ + self$param_att$submodel } ), private = list( @@ -103,7 +107,6 @@ Param_ATC <- R6Class( .param_att = NULL, .outcome_node = NULL, .cf_likelihood_treatment = NULL, - .cf_likelihood_control = NULL, - .submodel = list("A" = "logistic_switch", "Y" = "binomial_logit") + .cf_likelihood_control = NULL ) ) diff --git a/R/Param_ATT.R b/R/Param_ATT.R index 42b79cb9..a0a8e896 100644 --- a/R/Param_ATT.R +++ b/R/Param_ATT.R @@ -55,7 +55,7 @@ Param_ATT <- R6Class( inherit = Param_base, public = list( initialize = function(observed_likelihood, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { - super$initialize(observed_likelihood, list(), outcome_node) + super$initialize(observed_likelihood, list(), outcome_node, submodel = list("A" = "logistic_switch", "Y" = "binomial_logit")) private$.cf_likelihood_treatment <- CF_Likelihood$new(observed_likelihood, intervention_list_treatment) private$.cf_likelihood_control <- CF_Likelihood$new(observed_likelihood, intervention_list_control) }, @@ -154,7 +154,6 @@ Param_ATT <- R6Class( private = list( .type = "ATT", .cf_likelihood_treatment = NULL, - .cf_likelihood_control = NULL, - .submodel = list("A" = "logistic_switch", "Y" = "binomial_logit") + .cf_likelihood_control = NULL ) ) diff --git a/R/Param_base.R b/R/Param_base.R index 1527b23f..8482ca02 100644 --- a/R/Param_base.R +++ b/R/Param_base.R @@ -20,9 +20,23 @@ Param_base <- R6Class( portable = TRUE, class = TRUE, public = list( - initialize = function(observed_likelihood, ..., outcome_node = "Y") { + initialize = function(observed_likelihood, ..., outcome_node = "Y", submodel = NULL) { private$.observed_likelihood <- observed_likelihood private$.outcome_node <- outcome_node + if(is.null(submodel)) { # Default submodel + submodel <- list("A" = get_submodel_spec("binomial_logit"), "Y" = get_submodel_spec("binomial_logit"), "default" = get_submodel_spec("binomial_logit")) + } else if (is.list(submodel)) { # Convert to submodel spec list + submodel_names <- names(submodel) + + submodel <- lapply(submodel, get_submodel_spec) # For each node, convert to submodel spec list. #get_submodel_spec does nothing if item is already a list + names(submodel) <- submodel_names + } else { + submodel <- list("default" = get_submodel_spec(submodel)) + } + + + private$.submodel <- submodel + if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { if (!self$supports_outcome_censoring) { @@ -55,13 +69,22 @@ Param_base <- R6Class( if (!(node %in% names(private$.submodel))) { node <- "default" } - return(submodel_name %in% c(private$.submodel[[node]])) + return(submodel_name == private$.submodel[[node]]$name) }, get_submodel_spec = function(update_node) { - if (!(update_node %in% names(private$.submodel))) { + + if (!(update_node %in% names(self$submodel))) { update_node <- "default" } - return(get_submodel_spec(private$.submodel[[update_node]])) + + spec <- self$submodel[[update_node]] + if(!is.list(spec)) { + + spec <- get_submodel_spec(spec) + private$.submodel[[update_node]] <- spec + } + + return(spec) } ), active = list( @@ -97,7 +120,7 @@ Param_base <- R6Class( .outcome_node = NULL, .targeted = TRUE, .supports_outcome_censoring = FALSE, - .submodel = list("A" = "binomial_logit", "Y" = "binomial_logit", "default" = "binomial_logit") + .submodel = NULL ) ) diff --git a/R/Param_coxph.R b/R/Param_coxph.R index 80b9b097..7c50aa74 100644 --- a/R/Param_coxph.R +++ b/R/Param_coxph.R @@ -53,7 +53,7 @@ Param_coxph <- R6Class( inherit = Param_base, public = list( initialize = function(observed_likelihood, formula_coxph = ~1, intervention_list_treatment, intervention_list_control, family_fluctuation = c("binomial"), outcome_node = "N") { - super$initialize(observed_likelihood, list(), outcome_node = outcome_node) + super$initialize(observed_likelihood, list(), outcome_node = outcome_node, submodel = list(N = "binomial_logit")) family_fluctuation <- match.arg(family_fluctuation) training_task <- self$observed_likelihood$training_task W <- training_task$get_regression_task("W", is_time_variant = TRUE)$Y @@ -247,7 +247,6 @@ Param_coxph <- R6Class( .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, .formula_coxph = NULL, - .submodel = list(N = "binomial_logit"), .formula_names = NULL ) ) diff --git a/R/Param_npCATE.R b/R/Param_npCATE.R index 632d410e..e0aba4be 100644 --- a/R/Param_npCATE.R +++ b/R/Param_npCATE.R @@ -52,16 +52,16 @@ Param_npCATE <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_CATE = ~1, intervention_list_treatment, intervention_list_control, family_fluctuation = c("binomial", "gaussian", "poisson"), outcome_node = "Y") { - super$initialize(observed_likelihood, list(), outcome_node) + initialize = function(observed_likelihood, formula_CATE = ~1, intervention_list_treatment, intervention_list_control, submodel = c("binomial", "gaussian", "poisson"), outcome_node = "Y") { + submodel <- match.arg(submodel) + super$initialize(observed_likelihood, list(), outcome_node, submodel = submodel) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") V <- model.matrix(formula_CATE, as.data.frame(W)) private$.formula_names <- colnames(V) private$.targeted <- rep(T, ncol(V)) - family_fluctuation <- match.arg(family_fluctuation) - private$.submodel <- list(Y = family_fluctuation) + if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists @@ -198,7 +198,6 @@ Param_npCATE <- R6Class( .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, .formula_CATE = NULL, - .submodel = list(Y = "gaussian_identity"), .formula_names = NULL ) ) diff --git a/R/Param_npCATT.R b/R/Param_npCATT.R index 9735c764..5272f02c 100644 --- a/R/Param_npCATT.R +++ b/R/Param_npCATT.R @@ -49,16 +49,15 @@ Param_npCATT <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_CATT = ~1, intervention_list_treatment, intervention_list_control, family_fluctuation = c("binomial", "gaussian", "poisson"), outcome_node = "Y") { - super$initialize(observed_likelihood, list(), outcome_node) + initialize = function(observed_likelihood, formula_CATT = ~1, intervention_list_treatment, intervention_list_control, submodel = c("binomial", "gaussian", "poisson"), outcome_node = "Y") { + submodel <- match.arg(submodel) + super$initialize(observed_likelihood, list(), outcome_node, submodel = submodel) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") V <- model.matrix(formula_CATT, as.data.frame(W)) private$.formula_names <- colnames(V) private$.targeted <- rep(T, ncol(V)) - family_fluctuation <- match.arg(family_fluctuation) - private$.submodel <- list(Y = family_fluctuation) if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { # add delta_Y=0 to intervention lists @@ -200,7 +199,6 @@ Param_npCATT <- R6Class( .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, .formula_CATT = NULL, - .submodel = list(Y = "gaussian_identity"), .formula_names = NULL ) ) diff --git a/R/Param_npOR.R b/R/Param_npOR.R index 7212b47a..4bf430f7 100644 --- a/R/Param_npOR.R +++ b/R/Param_npOR.R @@ -201,7 +201,6 @@ Param_npOR <- R6Class( .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, .formula_logOR = NULL, - .submodel = list(Y = "binomial_logit"), .formula_names = NULL ) ) diff --git a/R/Param_npRR.R b/R/Param_npRR.R index 00ba3e1b..db9eaa67 100644 --- a/R/Param_npRR.R +++ b/R/Param_npRR.R @@ -52,16 +52,16 @@ Param_npRR <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_RR = ~1, intervention_list_treatment, intervention_list_control, binary_outcome = FALSE, family_fluctuation = c("poisson", "binomial"), outcome_node = "Y") { - super$initialize(observed_likelihood, list(), outcome_node) - family_fluctuation <- match.arg(family_fluctuation) + initialize = function(observed_likelihood, formula_RR = ~1, intervention_list_treatment, intervention_list_control, binary_outcome = FALSE, submodel = c("poisson", "binomial"), outcome_node = "Y") { + submodel <- match.arg(submodel) + super$initialize(observed_likelihood, list(), outcome_node, submodel = submodel) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") V <- model.matrix(formula_RR, as.data.frame(W)) private$.formula_names <- colnames(V) private$.targeted <- rep(T, ncol(V)) private$.binary_outcome <- binary_outcome - private$.submodel <- list(Y = family_fluctuation) + if (!is.null(observed_likelihood$censoring_nodes[[outcome_node]])) { @@ -192,7 +192,6 @@ Param_npRR <- R6Class( .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, .formula_RR = NULL, - .submodel = list(Y = "binomial_logit"), .formula_names = NULL, .binary_outcome = NULL ) diff --git a/R/Param_npTSM.R b/R/Param_npTSM.R index 04f45e84..0c5f114a 100644 --- a/R/Param_npTSM.R +++ b/R/Param_npTSM.R @@ -52,10 +52,9 @@ Param_npTSM <- R6Class( class = TRUE, inherit = Param_base, public = list( - initialize = function(observed_likelihood, formula_TSM = ~1, intervention_list, family_fluctuation = c("binomial", "gaussian", "poisson"), outcome_node = "Y") { - family_fluctuation <- match.arg(family_fluctuation) - private$.submodel <- list(Y = family_fluctuation) - super$initialize(observed_likelihood, list(), outcome_node) + initialize = function(observed_likelihood, formula_TSM = ~1, intervention_list, submodel = c("binomial", "gaussian", "poisson"), outcome_node = "Y") { + submodel <- match.arg(submodel) + super$initialize(observed_likelihood, list(), outcome_node, submodel = submodel) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") V <- model.matrix(formula_TSM, as.data.frame(W)) @@ -174,7 +173,6 @@ Param_npTSM <- R6Class( .cf_likelihood = NULL, .supports_outcome_censoring = TRUE, .formula_TSM = NULL, - .submodel = list(Y = "binomial_identity"), .formula_names = NULL ) ) diff --git a/R/Param_spCATE.R b/R/Param_spCATE.R index 746246bc..c6a07f73 100644 --- a/R/Param_spCATE.R +++ b/R/Param_spCATE.R @@ -50,7 +50,7 @@ Param_spCATE <- R6Class( inherit = Param_base, public = list( initialize = function(observed_likelihood, formula_CATE = ~1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { - super$initialize(observed_likelihood, list(), outcome_node) + super$initialize(observed_likelihood, list(), outcome_node, submodel = list(Y = "gaussian_identity")) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") V <- model.matrix(formula_CATE, as.data.frame(W)) @@ -200,7 +200,6 @@ Param_spCATE <- R6Class( .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, .formula_CATE = NULL, - .submodel = list(Y = "gaussian_identity"), .formula_names = NULL ) ) diff --git a/R/Param_spOR.R b/R/Param_spOR.R index 269972c7..955e41df 100644 --- a/R/Param_spOR.R +++ b/R/Param_spOR.R @@ -50,7 +50,7 @@ Param_spOR <- R6Class( inherit = Param_base, public = list( initialize = function(observed_likelihood, formula_logOR = ~1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { - super$initialize(observed_likelihood, list(), outcome_node) + super$initialize(observed_likelihood, list(), outcome_node, submodel = list(Y = "binomial_logit")) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") V <- model.matrix(formula_logOR, as.data.frame(W)) @@ -182,7 +182,6 @@ Param_spOR <- R6Class( .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, .formula_logOR = NULL, - .submodel = list(Y = "binomial_logit"), .formula_names = NULL ) ) diff --git a/R/Param_spRR.R b/R/Param_spRR.R index 53b56061..31604a75 100644 --- a/R/Param_spRR.R +++ b/R/Param_spRR.R @@ -51,7 +51,7 @@ Param_spRR <- R6Class( inherit = Param_base, public = list( initialize = function(observed_likelihood, formula_logRR = ~1, intervention_list_treatment, intervention_list_control, outcome_node = "Y") { - super$initialize(observed_likelihood, list(), outcome_node) + super$initialize(observed_likelihood, list(), outcome_node, submodel = list(Y = "poisson_log")) training_task <- self$observed_likelihood$training_task W <- training_task <- self$observed_likelihood$training_task$get_tmle_node("W") V <- model.matrix(formula_logRR, as.data.frame(W)) @@ -187,7 +187,6 @@ Param_spRR <- R6Class( .cf_likelihood_control = NULL, .supports_outcome_censoring = TRUE, .formula_logRR = NULL, - .submodel = list(Y = "poisson_log"), .formula_names = NULL ) ) diff --git a/R/Param_survival.R b/R/Param_survival.R index 3a9eba9e..dd452704 100644 --- a/R/Param_survival.R +++ b/R/Param_survival.R @@ -51,7 +51,7 @@ Param_survival <- R6Class( } else { private$.targeted <- times %in% target_times } - super$initialize(observed_likelihood, ..., outcome_node = outcome_node) + super$initialize(observed_likelihood, ..., outcome_node = outcome_node, submodel = list(N = "binomial_logit")) }, long_to_mat = function(x, id, time) { dt <- data.table(id = id, time = time, x = as.vector(x)) diff --git a/R/submodels.R b/R/submodels.R index 27e9b363..f44ab213 100644 --- a/R/submodels.R +++ b/R/submodels.R @@ -1,9 +1,50 @@ + +# To port to sl3 at some point: + +#' Log likelihood loss for outcomes between 0 and 1 +#' +#' @param estimate prediction +#' @param observed observed outcome +#' @export +loss_loglik_binomial <- function(estimate, observed) { + # loss <- -1 * ifelse(observed == 1, log(estimate), log(1 - estimate)) + loss <- -1 * (observed * log(estimate) + (1 - observed) * log(1 - estimate)) + return(loss) +} +#' log likelihood loss +#' @param estimate prediction +#' @param observed observed outcome +#' @export +loss_loglik <- function(estimate, observed) { + loss <- -1 * log(estimate) + return(loss) +} + +#' Poisson/log-linear loss for nonnegative variables +#' +#' @param estimate prediction +#' @param observed observed outcome +#' @export +loss_poisson <- function(estimate, observed ) { + loss <- estimate - observed * log(estimate) + return(loss) +} + + + + + + + + #' Generate Fluctuation Submodel from \code{family} object. #' #' @param family ... #' #' @export # + + generate_submodel_from_family <- function(family) { linkfun <- family$linkfun linkinv <- family$linkinv @@ -30,82 +71,8 @@ submodel_logistic_switch <- function(eps, offset, X, observed) { output <- ifelse(observed == 1, output, 1 - output) } -#' Log likelihood loss for binary variables -#' -#' @param estimate ... -#' @param observed ... -#' @param weights ... -#' @param v ... -#' @export -loss_function_loglik_binomial <- function(estimate, observed, weights = NULL, likelihood = NULL, tmle_task = NULL, fold_number = NULL) { - # loss <- -1 * ifelse(observed == 1, log(estimate), log(1 - estimate)) - loss <- -1 * (observed * log(estimate) + (1 - observed) * log(1 - estimate)) - if (!is.null(weights)) { - loss <- weights * loss - } - return(loss) -} -#' @export -loss_function_loglik <- function(estimate, observed, weights = NULL, likelihood = NULL, tmle_task = NULL, fold_number = NULL) { - loss <- -1 * log(estimate) - if (!is.null(weights)) { - loss <- weights * loss - } - return(loss) -} - -#' Linear (gaussian) Submodel Fluctuation -#' -#' @param eps ... -#' @param X ... -#' @param offset ... -#' -#' -#' @export -# -submodel_linear <- generate_submodel_from_family(gaussian()) -#' Least-squares loss for binary variables -#' -#' @param estimate ... -#' @param observed ... -#' @param weights ... -#' @param likelihood ... -#' @export -loss_function_least_squares <- function(estimate, observed, weights = NULL, likelihood = NULL, tmle_task = NULL, fold_number = NULL) { - loss <- (observed - estimate)^2 - if (!is.null(weights)) { - loss <- weights * loss - } - return(loss) -} -#' Log-linear (Poisson) Submodel Fluctuation -#' -#' @param eps ... -#' @param X ... -#' @param offset ... -#' -#' -#' @export -# -submodel_exp <- generate_submodel_from_family(poisson()) - -#' Poisson/log-linear loss for nonnegative variables -#' -#' @param estimate ... -#' @param observed ... -#' @param weights ... -#' @param likelihood ... -#' @export -loss_function_poisson <- function(estimate, observed, weights = NULL, likelihood = NULL, tmle_task = NULL, fold_number = NULL) { - loss <- estimate - observed * log(estimate) - if (!is.null(weights)) { - loss <- weights * loss - } - return(loss) -} - #' Generate loss function loss from family object or string #' @param family ... #' @export @@ -117,11 +84,11 @@ generate_loss_function_from_family <- function(family) { stop("Unsupported family object.") } if (family == "poisson") { - return(loss_function_poisson) + return(loss_poisson) } else if (family == "gaussian") { - return(loss_function_least_squares) + return(loss_squared_error) } else if (family == "binomial") { - return(loss_function_loglik_binomial) + return(loss_loglik_binomial) } } @@ -147,6 +114,11 @@ make_submodel_spec <- function(name, family = NULL, submodel_function = NULL, lo #' @param name Either a name for a submodel spec obtainable from environment (name --> get(paste0("submodel_spec_",name))}), a family object or string, or a string of the form "family_link" (e.g. "binomial_logit"). #' @export get_submodel_spec <- function(name) { + # If list, assume it is already a spec + + if(is.list(name)){ + return(name) + } output <- NULL tryCatch( { @@ -163,6 +135,7 @@ get_submodel_spec <- function(name) { output <- make_submodel_spec(name, family) }, error = function(...) { + print(...) try({ output <<- get(paste0("submodel_spec_", name)) }) @@ -178,4 +151,4 @@ get_submodel_spec <- function(name) { #' @export submodel_spec_logistic_switch <- list(name = "logistic_switch", family = function() { stop("Does not support family-based updating. Please use optim instead.") -}, submodel_function = submodel_logistic_switch, loss_function = loss_function_loglik) +}, submodel_function = submodel_logistic_switch, loss_function = loss_loglik) diff --git a/R/tmle3_Spec_npCausalGLM.R b/R/tmle3_Spec_npCausalGLM.R index 271bed92..98b3c282 100644 --- a/R/tmle3_Spec_npCausalGLM.R +++ b/R/tmle3_Spec_npCausalGLM.R @@ -11,12 +11,12 @@ tmle3_Spec_npCausalGLM <- R6Class( portable = TRUE, class = TRUE, public = list( - initialize = function(formula, estimand = c("CATE", "CATT", "TSM", "OR", "RR"), treatment_level = 1, control_level = 0, family_fluctuation = NULL, + initialize = function(formula, estimand = c("CATE", "CATT", "TSM", "OR", "RR"), treatment_level = 1, control_level = 0, submodel = NULL, likelihood_override = NULL, variable_types = NULL, delta_epsilon = 0.025, ...) { estimand <- match.arg(estimand) private$.options <- list( - estimand = estimand, formula = formula, family_fluctuation = family_fluctuation, + estimand = estimand, formula = formula, submodel = submodel, treatment_level = treatment_level, control_level = control_level, delta_epsilon = delta_epsilon, likelihood_override = likelihood_override, variable_types = variable_types, ... @@ -27,7 +27,7 @@ tmle3_Spec_npCausalGLM <- R6Class( include_variance_node <- FALSE scale_outcome <- TRUE Y <- data[[node_list$Y]] - family <- self$options$family_fluctuation + family <- self$options$submodel if (is.null(family) && self$options$estimand %in% c("CATE", "CATT", "TSM")) { if (all(Y %in% c(0, 1))) { @@ -54,7 +54,7 @@ tmle3_Spec_npCausalGLM <- R6Class( scale_outcome <- FALSE } } - private$.options$family_fluctuation <- family + private$.options$submodel <- family binary_outcome <- all(data[[node_list$Y]] %in% c(0, 1)) private$.options$binary_outcome <- binary_outcome if (self$options$estimand == "RR") { @@ -95,7 +95,7 @@ tmle3_Spec_npCausalGLM <- R6Class( } else if (self$options$estimand == "OR") { updater <- tmle3_Update$new(maxit = 200, one_dimensional = TRUE, convergence_type = convergence_type, verbose = verbose, delta_epsilon = delta_epsilon, constrain_step = TRUE, bounds = 0.0025, ...) } else if (self$options$estimand == "RR") { - if (self$options$family_fluctuation == "poisson") { + if (self$options$submodel == "poisson") { bounds <- list(Y = c(0.0025, Inf), A = 0.005) } else { bounds <- list(Y = 0.0025, A = 0.005) @@ -114,7 +114,7 @@ tmle3_Spec_npCausalGLM <- R6Class( treatment_value <- self$options$treatment_level control_value <- self$options$control_level formula <- self$options$formula - family <- self$options$family_fluctuation + family <- self$options$submodel A_levels <- tmle_task$npsem[["A"]]$variable_type$levels if (!is.null(A_levels)) { treatment_value <- factor(treatment_value, levels = A_levels) @@ -127,7 +127,7 @@ tmle3_Spec_npCausalGLM <- R6Class( # If TSM generate params for all levels param <- lapply(union(treatment_value, control_value), function(value) { treatment <- define_lf(LF_static, "A", value = value) - return(Param_npTSM$new(targeted_likelihood, formula, treatment, family_fluctuation = family)) + return(Param_npTSM$new(targeted_likelihood, formula, treatment, submodel = family)) }) return(param) } else { @@ -136,13 +136,13 @@ tmle3_Spec_npCausalGLM <- R6Class( } if (self$options$estimand == "CATE") { - param <- Param_npCATE$new(targeted_likelihood, formula, treatment, control, family_fluctuation = family) + param <- Param_npCATE$new(targeted_likelihood, formula, treatment, control, submodel = family) } else if (self$options$estimand == "CATT") { - param <- Param_npCATT$new(targeted_likelihood, formula, treatment, control, family_fluctuation = family) + param <- Param_npCATT$new(targeted_likelihood, formula, treatment, control, submodel = family) } else if (self$options$estimand == "OR") { param <- Param_npOR$new(targeted_likelihood, formula, treatment, control) } else if (self$options$estimand == "RR") { - param <- Param_npRR$new(targeted_likelihood, formula, treatment, control, binary_outcome = self$options$binary_outcome, family_fluctuation = family) + param <- Param_npRR$new(targeted_likelihood, formula, treatment, control, binary_outcome = self$options$binary_outcome, submodel = family) } return(list(param)) } diff --git a/R/tmle3_Update.R b/R/tmle3_Update.R index 1dbb3f34..bcd91baf 100644 --- a/R/tmle3_Update.R +++ b/R/tmle3_Update.R @@ -117,6 +117,7 @@ tmle3_Update <- R6Class( # USE first parameter to get submodel spec submodel_spec <- self$tmle_params[[1]]$get_submodel_spec(update_node) + submodel_name <- submodel_spec$name # Check compatibility of tmle_params with submodel lapply(self$tmle_params, function(tmle_param) { @@ -210,9 +211,9 @@ tmle3_Update <- R6Class( submodel_data$submodel_spec <- submodel_spec # To support arbitrary likelihood-dependent risk functions for updating. # Is carrying this stuff around a problem computationally? - submodel_data$tmle_task <- tmle_task - submodel_data$likelihood <- likelihood - submodel_data$fold_number <- fold_number + # submodel_data$tmle_task <- tmle_task + # submodel_data$likelihood <- likelihood + # submodel_data$fold_number <- fold_number return(submodel_data) }, @@ -232,9 +233,7 @@ tmle3_Update <- R6Class( family_object <- submodel_spec$family loss_function <- submodel_spec$loss_function submodel <- submodel_spec$submodel_function - training_likelihood <- submodel_data$likelihood - training_task <- submodel_data$tmle_task - training_fold <- submodel_data$fold_number + # Subset to only numericals needed for fitting. submodel_data <- submodel_data[c("observed", "H", "initial", "weights")] @@ -247,11 +246,12 @@ tmle3_Update <- R6Class( ) } + weights <- submodel_data$weights risk <- function(epsilon) { submodel_estimate <- self$apply_submodel(submodel, submodel_data, epsilon) - loss <- loss_function(submodel_estimate, submodel_data$observed, weights = submodel_data$weights, likelihood = training_likelihood, tmle_task = training_task, fold_number = training_fold) - mean(loss) + loss <- loss_function(submodel_estimate, submodel_data$observed) + weighted.mean(loss, weights) } diff --git a/man/loss_function_least_squares.Rd b/man/loss_function_least_squares.Rd deleted file mode 100644 index c4f5e94e..00000000 --- a/man/loss_function_least_squares.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/submodels.R -\name{loss_function_least_squares} -\alias{loss_function_least_squares} -\title{Least-squares loss for binary variables} -\usage{ -loss_function_least_squares( - estimate, - observed, - weights = NULL, - likelihood = NULL, - tmle_task = NULL, - fold_number = NULL -) -} -\arguments{ -\item{estimate}{...} - -\item{observed}{...} - -\item{weights}{...} - -\item{likelihood}{...} -} -\description{ -Least-squares loss for binary variables -} diff --git a/man/loss_function_loglik_binomial.Rd b/man/loss_function_loglik_binomial.Rd deleted file mode 100644 index b55c2b2b..00000000 --- a/man/loss_function_loglik_binomial.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/submodels.R -\name{loss_function_loglik_binomial} -\alias{loss_function_loglik_binomial} -\title{Log likelihood loss for binary variables} -\usage{ -loss_function_loglik_binomial( - estimate, - observed, - weights = NULL, - likelihood = NULL, - tmle_task = NULL, - fold_number = NULL -) -} -\arguments{ -\item{estimate}{...} - -\item{observed}{...} - -\item{weights}{...} - -\item{v}{...} -} -\description{ -Log likelihood loss for binary variables -} diff --git a/man/loss_function_poisson.Rd b/man/loss_function_poisson.Rd deleted file mode 100644 index 9f95bf5a..00000000 --- a/man/loss_function_poisson.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/submodels.R -\name{loss_function_poisson} -\alias{loss_function_poisson} -\title{Poisson/log-linear loss for nonnegative variables} -\usage{ -loss_function_poisson( - estimate, - observed, - weights = NULL, - likelihood = NULL, - tmle_task = NULL, - fold_number = NULL -) -} -\arguments{ -\item{estimate}{...} - -\item{observed}{...} - -\item{weights}{...} - -\item{likelihood}{...} -} -\description{ -Poisson/log-linear loss for nonnegative variables -} diff --git a/man/loss_loglik.Rd b/man/loss_loglik.Rd new file mode 100644 index 00000000..94851aa3 --- /dev/null +++ b/man/loss_loglik.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/submodels.R +\name{loss_loglik} +\alias{loss_loglik} +\title{log likelihood loss} +\usage{ +loss_loglik(estimate, observed) +} +\arguments{ +\item{estimate}{prediction} + +\item{observed}{observed outcome} +} +\description{ +log likelihood loss +} diff --git a/man/loss_loglik_binomial.Rd b/man/loss_loglik_binomial.Rd new file mode 100644 index 00000000..e20aabb2 --- /dev/null +++ b/man/loss_loglik_binomial.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/submodels.R +\name{loss_loglik_binomial} +\alias{loss_loglik_binomial} +\title{Log likelihood loss for outcomes between 0 and 1} +\usage{ +loss_loglik_binomial(estimate, observed) +} +\arguments{ +\item{estimate}{prediction} + +\item{observed}{observed outcome} +} +\description{ +Log likelihood loss for outcomes between 0 and 1 +} diff --git a/man/loss_poisson.Rd b/man/loss_poisson.Rd new file mode 100644 index 00000000..55ca7f1a --- /dev/null +++ b/man/loss_poisson.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/submodels.R +\name{loss_poisson} +\alias{loss_poisson} +\title{Poisson/log-linear loss for nonnegative variables} +\usage{ +loss_poisson(estimate, observed) +} +\arguments{ +\item{estimate}{prediction} + +\item{observed}{observed outcome} +} +\description{ +Poisson/log-linear loss for nonnegative variables +} diff --git a/man/submodel_exp.Rd b/man/submodel_exp.Rd deleted file mode 100644 index 9aa79177..00000000 --- a/man/submodel_exp.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/submodels.R -\name{submodel_exp} -\alias{submodel_exp} -\title{Log-linear (Poisson) Submodel Fluctuation} -\usage{ -submodel_exp(eps, offset, X, observed) -} -\arguments{ -\item{eps}{...} - -\item{offset}{...} - -\item{X}{...} -} -\description{ -Log-linear (Poisson) Submodel Fluctuation -} diff --git a/man/submodel_linear.Rd b/man/submodel_linear.Rd deleted file mode 100644 index f6e2f8ca..00000000 --- a/man/submodel_linear.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/submodels.R -\name{submodel_linear} -\alias{submodel_linear} -\title{Linear (gaussian) Submodel Fluctuation} -\usage{ -submodel_linear(eps, offset, X, observed) -} -\arguments{ -\item{eps}{...} - -\item{offset}{...} - -\item{X}{...} -} -\description{ -Linear (gaussian) Submodel Fluctuation -} diff --git a/tests/testthat/test-coxph.R b/tests/testthat/test-coxph.R new file mode 100644 index 00000000..d7de04db --- /dev/null +++ b/tests/testthat/test-coxph.R @@ -0,0 +1,57 @@ + + + +context("Coxph does not error.") + + + +if(require(simcausal)){ +tmax <- 5 +print(i) +D <- DAG.empty() +D <- D + node("W1", distr = "runif", min = -1, max = 1) + + node("Wa2", distr = "rbinom", size = 1, prob = 0.5) + + node("W2", distr = "rconst", const =Wa2 - 0.5 ) + + node("A", distr = "rbinom", size = 1, prob = plogis(W1 + W2 )) + + node("dNt", t = 1:tmax, EFU = TRUE , distr = "rbinom", size = 1, prob = exp(0.5*A)*0.15*plogis(W1 + W2 )) + + node("dCt", t = 1:tmax, EFU = TRUE, distr = "rbinom", size = 1, prob = 0*plogis(W1 + W2 + t)) +D <- set.DAG(D) +data <- sim(D, n = 1000) +data + +data_N <- data[, grep("[d][N].+", colnames(data)), drop = F] +data_C <- data[, grep("[d][C].+", colnames(data)), drop = F] + +data_surv <- as.data.frame(do.call(rbind, lapply(1:nrow(data), function(i) { + rowN <- data_N[i,] + rowC <- data_C[i,] + t <- which(rowN==1) + tc <- which(rowC==1) + if(length(t)==0){ + t <- tmax+2 + } + if(length(tc)==0){ + tc <- tmax + 1 + } + + Ttilde <- min(t,tc) + Delta <- t <= tc + return(matrix(c(Ttilde,Delta), nrow=1)) +}))) +colnames(data_surv) <- c("Ttilde", "Delta") +data$Ttilde <- data_surv$Ttilde +data$Delta <- data_surv$Delta +data <- data[, -grep("[d][C].+", colnames(data))] +data <- data[, -grep("[d][N].+", colnames(data))] +data + + + +doMC::registerDoMC(16) + +tmle_spec_np <- tmle3_Spec_coxph$new(formula = ~1, delta_epsilon = 0.05, verbose = T, treatment_level = 1, control_level = 0) +learner_list <- list(A = Lrnr_glm$new() , N = Lrnr_glm$new(formula = ~.^2), A_c = Lrnr_glm$new(formula = ~.^2)) +node_list <- list( W = c("W1", "W2"), A = "A", T_tilde = "Ttilde", Delta = "Delta" ) + +tmle3_fit <- suppressMessages(suppressWarnings(tmle3(tmle_spec_np, data, node_list, learner_list))) +} diff --git a/tests/testthat/test-spnpCATECATT.R b/tests/testthat/test-spnpCATECATT.R index 90524925..1749600a 100644 --- a/tests/testthat/test-spnpCATECATT.R +++ b/tests/testthat/test-spnpCATECATT.R @@ -37,6 +37,10 @@ for (i in 1:1) { out <- out$summary passes2 <- c(passes2, out$lower <= 1 & out$upper >= 1) + spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1, "TSM") + suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list)) + out <- out$summary + print(mean(passes)) print(mean(passes1)) print(mean(passes2)) diff --git a/tests/testthat/test-spRR.R b/tests/testthat/test-spnpRR.R similarity index 100% rename from tests/testthat/test-spRR.R rename to tests/testthat/test-spnpRR.R diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index 250ade95..f8931742 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -139,39 +139,44 @@ passes2 <- c() for(i in 1:100){ print(i) -n <- 500 +n <- 1000 W <- runif(n, -1, 1) A <- rbinom(n, size = 1, prob = plogis(W)) Y <- rnorm(n, mean = A*W + A+W, sd = 0.3) data <- data.table(W,A,Y) -lrnr_Y0W <- Lrnr_glm$new() +lrnr_Y0W <- Lrnr_glmnet$new(formula = ~.^2) lrnr_A <- Lrnr_glm$new() node_list <- list (W = "W", A = "A", Y= "Y") learner_list <- list(A = lrnr_A, Y = lrnr_Y0W ) -# spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1, "CATE") -# out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) +spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1, "CATE") +out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) print("CATE") -spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "CATE") -# suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) -# out <- out$summary -# passes <- cbind(passes , out$lower <= 1 & out$upper >= 1) -# out +``` + +```{r} +spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "TSM", submodel = "gaussian") +suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + + out$tmle_params[[1]]$submodel print("CATT") + + + spec_spCATE <- tmle3_Spec_npCausalGLM$new(~1 + W, "CATT") (out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) - out$summary +out <- out$summary passes1 <- cbind(passes1 , out$lower <= 1 & out$upper >= 1) -# print("CATE") -# spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W, "CATE") -# suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) -# out <- out$summary -# passes2 <- cbind(passes2 , out$lower <= 1 & out$upper >= 1) -# out -# print(rowMeans(passes)) -# print(rowMeans(passes1)) -# print(rowMeans(passes2)) +print("CATE") +spec_spCATE <- tmle3_Spec_spCausalGLM$new(~1 + W, "CATE") +suppressWarnings(out <- tmle3(spec_spCATE, data, node_list, learner_list = learner_list) ) + out <- out$summary +passes2 <- cbind(passes2 , out$lower <= 1 & out$upper >= 1) +out +print(rowMeans(passes)) +print(rowMeans(passes1)) +print(rowMeans(passes2)) } ``` From 0787f9b814eee0d0b7254b8674af01fe41e22666 Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Thu, 7 Apr 2022 10:17:13 -0700 Subject: [PATCH 64/65] ATE weights --- DESCRIPTION | 2 +- R/Param_ATE.R | 4 ++-- vignettes/testing.Rmd | 27 ++++++++++++++++++++++++++- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5ffcf770..3649e0ba 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -54,5 +54,5 @@ Encoding: UTF-8 LazyData: yes LazyLoad: yes VignetteBuilder: knitr -RoxygenNote: 7.1.1.9001 +RoxygenNote: 7.1.2 Roxygen: list(markdown = TRUE, r6 = FALSE) diff --git a/R/Param_ATE.R b/R/Param_ATE.R index b958dbab..8a7f4647 100644 --- a/R/Param_ATE.R +++ b/R/Param_ATE.R @@ -114,10 +114,10 @@ Param_ATE <- R6Class( EY1 <- self$observed_likelihood$get_likelihood(cf_task_treatment, self$outcome_node, fold_number) EY0 <- self$observed_likelihood$get_likelihood(cf_task_control, self$outcome_node, fold_number) - psi <- mean(EY1 - EY0) + psi <- weighted.mean(EY1 - EY0, tmle_task$weights) IC <- HA * (Y - EY) + (EY1 - EY0) - psi - + IC <- IC * weights result <- list(psi = psi, IC = IC) return(result) } diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd index f8931742..84b65c60 100644 --- a/vignettes/testing.Rmd +++ b/vignettes/testing.Rmd @@ -8,8 +8,33 @@ knitr::opts_chunk$set(echo = TRUE) ``` ```{r} -require(simcausal) +#devtools::install_github("tlverse/tmle", ref = "general_submodels_devel") +library(data.table) library(sl3) +library(tmle3) +n <- 500 +W <- runif(n, -1, 1) +A <- rbinom(n, 1, plogis(W)) +Y <- rnorm(n, A + W + A*W) +weights <- 1/plogis(W) +data <- data.table(W = W, A = A, Y = Y, weights = weights) + +tmle3_spec <- tmle3_Spec_ATE$new(treatment_level = 1, control_level = 0) + +tmle3_fit <- tmle3(tmle3_spec, data, node_list = list( + W = c("W"), + A = "A", + Y = "Y", + weights = "weights" # specify name of weight variable found in data +), learner_list = list( + A = Lrnr_cv$new(Lrnr_xgboost$new(max_depth = 5), full_fit = TRUE), + Y =Lrnr_cv$new(Lrnr_xgboost$new(max_depth = 5), full_fit = TRUE) +)) #cross-fitted xgboost +``` + +```{r} +tmle3_fit$estimates +tmle ``` ```{r} From dd189d8370d2f029f43e714004c8d6380e57028b Mon Sep 17 00:00:00 2001 From: Lars van der Laan Date: Thu, 7 Apr 2022 10:43:34 -0700 Subject: [PATCH 65/65] ATE weights --- R/Param_ATE.R | 2 +- R/helpers_point_treatment.R | 4 ++-- R/helpers_survival.R | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/Param_ATE.R b/R/Param_ATE.R index 8a7f4647..05c927d5 100644 --- a/R/Param_ATE.R +++ b/R/Param_ATE.R @@ -117,7 +117,7 @@ Param_ATE <- R6Class( psi <- weighted.mean(EY1 - EY0, tmle_task$weights) IC <- HA * (Y - EY) + (EY1 - EY0) - psi - IC <- IC * weights + IC <- IC * tmle_task$weights result <- list(psi = psi, IC = IC) return(result) } diff --git a/R/helpers_point_treatment.R b/R/helpers_point_treatment.R index e385e965..837b383e 100644 --- a/R/helpers_point_treatment.R +++ b/R/helpers_point_treatment.R @@ -32,9 +32,9 @@ point_tx_task <- function(data, node_list, variable_types = NULL, scale_outcome npsem <- point_tx_npsem(node_list, variable_types, scale_outcome, include_variance_node) if (!is.null(node_list$id)) { - tmle_task <- tmle3_Task$new(data, npsem = npsem, id = node_list$id, ...) + tmle_task <- tmle3_Task$new(data, npsem = npsem, id = node_list$id, weights = node_list$weights, ...) } else { - tmle_task <- tmle3_Task$new(data, npsem = npsem, ...) + tmle_task <- tmle3_Task$new(data, npsem = npsem, weights = node_list$weights, ...) } return(tmle_task) diff --git a/R/helpers_survival.R b/R/helpers_survival.R index 42a71eb4..0fc03433 100644 --- a/R/helpers_survival.R +++ b/R/helpers_survival.R @@ -41,9 +41,9 @@ survival_tx_task <- function(data, node_list, variable_types = NULL, ...) { npsem <- survival_tx_npsem(node_list, variable_types) if (!is.null(node_list$id)) { - tmle_task <- tmle3_Task$new(data, npsem = npsem, id = node_list$id, time = node_list$time, ...) + tmle_task <- tmle3_Task$new(data, npsem = npsem, id = node_list$id, time = node_list$time, weights = node_list$weights, ...) } else { - tmle_task <- tmle3_Task$new(data, npsem = npsem, ...) + tmle_task <- tmle3_Task$new(data, npsem = npsem, weights = node_list$weights, ...) } return(tmle_task)