From 5054cc73205d8d1b8021cbe8ba342f3dbbddf944 Mon Sep 17 00:00:00 2001
From: blind-contours <david_mccoy@berkeley.edu>
Date: Fri, 5 Apr 2024 18:31:42 -0700
Subject: [PATCH] removed old effect mod functions

---
 NAMESPACE                          |   5 -
 R/final_result_utils.R             | 163 -----------------------------
 R/utils.R                          |  88 ----------------
 man/calc_final_effect_mod_param.Rd |  36 -------
 man/list_rules_party.Rd            |  21 ----
 5 files changed, 313 deletions(-)
 delete mode 100644 man/calc_final_effect_mod_param.Rd
 delete mode 100644 man/list_rules_party.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 8a05495..94c8f26 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -3,7 +3,6 @@
 export(InterXshift)
 export(calc_CIs)
 export(calc_basis_freq)
-export(calc_final_effect_mod_param)
 export(calc_final_ind_shift_param)
 export(calc_final_joint_shift_param)
 export(calc_intxn_results)
@@ -24,7 +23,6 @@ export(indiv_stoch_shift_est_Q)
 export(indiv_stoch_shift_est_g_exp)
 export(joint_stoch_shift_est_Q)
 export(joint_stoch_shift_est_g_exp)
-export(list_rules_party)
 export(quad_integrand_q_g_r)
 export(scale_to_original)
 export(scale_to_unit)
@@ -47,11 +45,9 @@ importFrom(data.table,setnames)
 importFrom(dplyr,bind_rows)
 importFrom(dplyr,filter)
 importFrom(dplyr,group_by)
-importFrom(dplyr,mutate)
 importFrom(dplyr,top_n)
 importFrom(foreach,"%dopar%")
 importFrom(magrittr,"%>%")
-importFrom(partykit,glmtree)
 importFrom(purrr,is_empty)
 importFrom(purrr,map)
 importFrom(rlang,":=")
@@ -66,7 +62,6 @@ importFrom(stats,cov)
 importFrom(stats,fitted)
 importFrom(stats,glm)
 importFrom(stats,lm)
-importFrom(stats,median)
 importFrom(stats,model.matrix)
 importFrom(stats,p.adjust)
 importFrom(stats,plogis)
diff --git a/R/final_result_utils.R b/R/final_result_utils.R
index fab4bc7..0ddab4a 100644
--- a/R/final_result_utils.R
+++ b/R/final_result_utils.R
@@ -107,169 +107,6 @@ calc_final_ind_shift_param <- function(tmle_fit, exposure, fold_k) {
   return(results)
 }
 
-#' Calculates the Effect Modification Shift Parameter
-#'
-#' @description This function uses a decision tree estimator to regress the difference in expected outcomes
-#'  of an exposure onto a covariate. Both the exposure and the covariate have been determined through data-adaptive methods.
-#'  If no rules are found, a median split is applied.
-#'
-#' @param tmle_fit_av TMLE results for the validation fold.
-#' @param tmle_fit_at TMLE results for the training fold.
-#' @param exposure The identified exposure variable as a \code{character} string.
-#' @param at Training dataset.
-#' @param av Validation dataset.
-#' @param effect_m_name The name of the effect modifier variable as a \code{character} string.
-#' @param fold_k The fold in which the effect modification was found.
-#' @importFrom partykit glmtree
-#' @importFrom stats median
-#' @importFrom dplyr mutate
-#' @export
-#'
-
-calc_final_effect_mod_param <- function(tmle_fit_av,
-                                        tmle_fit_at,
-                                        exposure,
-                                        at,
-                                        av,
-                                        effect_m_name,
-                                        fold_k) {
-  # Calculate pseudo_outcome for training and validation datasets
-  pseudo_outcome_at <- tmle_fit_at$qn_shift_star - tmle_fit_at$qn_noshift_star
-  pseudo_outcome_av <- tmle_fit_av$qn_shift_star - tmle_fit_av$qn_noshift_star
-
-  # Rename columns
-  names(pseudo_outcome_at) <- "pseudo_outcome_at"
-  names(pseudo_outcome_av) <- "pseudo_outcome_av"
-
-  # Prepare data for the tree model
-  em_model_data_at <- cbind.data.frame(
-    pseudo_outcome_at, subset(at,
-      select = effect_m_name
-    )
-  )
-
-  em_model_data_av <- cbind.data.frame(
-    pseudo_outcome_av, subset(av,
-      select = effect_m_name
-    )
-  )
-
-  # Create the formula for the tree model
-  response_var <- "pseudo_outcome_at"
-  formula_string <- paste(response_var, "~", effect_m_name)
-  formula <- as.formula(formula_string)
-
-  # Fit the tree model
-  tree_model <- glmtree(formula, data = em_model_data_at)
-
-  # Extract the rules from the tree model
-  rules <- list_rules_party(tree_model)
-
-  # If no rules are found, use median split
-  if (all(rules == "") == TRUE) {
-    # Determine if the effect modifier is binary (0 or 1)
-    is_binary <- all(apply(at[, ..effect_m_name], 2, function(x) {
-      all(x %in% 0:1)
-    }))
-
-    # Calculate medians for the effect modifier
-    medians <- apply(at[, ..effect_m_name], 2, median)
-
-    # Create median rules
-    median_rule_list <- lapply(seq_along(medians), function(i) {
-      if (is_binary) {
-        paste(names(medians)[i], "==", medians[i])
-      } else {
-        paste(names(medians)[i], "<=", medians[i])
-      }
-    })
-
-    rules <- median_rule_list
-  }
-
-  # Initialize results_list
-  results_list <- list()
-
-  # Loop through the rules
-  for (i in seq_along(rules)) {
-    rule <- rules[[i]]
-
-    # Apply the rule to split the data
-    em_split_data <- em_model_data_av %>%
-      dplyr::mutate(ind = ifelse(eval(parse(text = rule)), 1, 0))
-
-    # If the rule does not partition the validation sample and leads to all one value,
-    # skip to the next rule
-    if (length(unique(em_split_data$ind)) == 1) {
-      next()
-    }
-
-    # Calculate the inverse propensity weights
-    inverse_prop_positive <- ifelse(em_split_data$ind == 1,
-      1 / (table(em_split_data$ind)[[2]] /
-        length(em_split_data$ind)), 0
-    )
-
-    inverse_prop_negative <- ifelse(em_split_data$ind == 0,
-      1 / (table(em_split_data$ind)[[1]]
-      / length(em_split_data$ind)), 0
-    )
-
-    # Calculate the weighted EIFs
-    inverse_prop_eif_pos <- inverse_prop_positive * (tmle_fit_av$eif - tmle_fit_av$noshift_eif)
-    inverse_prop_eif_neg <- inverse_prop_negative * (tmle_fit_av$eif - tmle_fit_av$noshift_eif)
-
-    # Calculate the shift difference for both groups
-    diff <- tmle_fit_av$qn_shift_star - tmle_fit_av$qn_noshift_star
-    psi_em_one <- mean(diff[em_split_data$ind == 1])
-    psi_em_zero <- mean(diff[em_split_data$ind == 0])
-
-    # Calculate variance and standard error
-    psi_one_var <- var(inverse_prop_eif_pos[em_split_data$ind == 1]) /
-      table(em_split_data$ind)[[2]]
-
-    psi_zero_var <- var(inverse_prop_eif_neg[em_split_data$ind == 0]) /
-      table(em_split_data$ind)[[1]]
-
-    # Calculate confidence intervals
-    em_one_ci <- calc_CIs(psi_em_one, sqrt(psi_one_var))
-    em_zero_ci <- calc_CIs(psi_em_zero, sqrt(psi_zero_var))
-
-    # Calculate p-values
-    level_1_p_val <- calc_pvals(psi_em_one, psi_one_var)
-    level_0_p_val <- calc_pvals(psi_em_zero, psi_zero_var)
-
-    # Store results in a data frame
-    results <- data.table::data.table(
-      Condition = c(
-        paste("Level 1 Shift Diff in", rule),
-        paste("Level 0 Shift Diff in", rule)
-      ),
-      Psi = c(psi_em_one, psi_em_zero),
-      Variance = c(psi_one_var, psi_zero_var),
-      SE = c(sqrt(psi_one_var), sqrt(psi_zero_var)),
-      Lower_CI = c(em_one_ci[1], em_zero_ci[1]),
-      Upper_CI = c(em_one_ci[2], em_zero_ci[2]),
-      P_value = c(level_1_p_val, level_0_p_val),
-      Fold = fold_k,
-      Type = "Effect Mod",
-      Variables = paste(exposure, effect_m_name, sep = ""),
-      N = length(tmle_fit_av$eif)
-    )
-
-    # Add the results to the results_list
-    results_list[[i]] <- results
-  }
-
-  # Combine all results into a single data frame
-  results_df <- do.call(rbind, results_list)
-
-  # Remove duplicated results
-  results_df <- results_df[!duplicated(results_df$Psi), ]
-
-  return(results_df)
-}
-
 
 #' @title Calculates the Joint Shift Parameter
 #' @description Estimates the shift parameter for a joint shift
diff --git a/R/utils.R b/R/utils.R
index 20462b2..035e710 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -135,91 +135,3 @@ is.InterXshift <- function(x) {
 is.InterXshift <- function(x) {
   class(x) == "InterXshift_msm"
 }
-
-###################################################################
-#' @title Get rules from partykit object in rule fitting
-#' @param x Partykit glmtree model object
-#' @param i null
-#' @param ... additional arguments
-#' @return List of rules
-#'
-#' @export
-# Copied from internal partykit function
-list_rules_party <- function(x, i = NULL, ...) {
-  if (is.null(i)) {
-    i <- partykit::nodeids(x, terminal = TRUE)
-  }
-  if (length(i) > 1) {
-    ret <- sapply(i, list_rules_party, x = x)
-    names(ret) <- if (is.character(i)) {
-      i
-    } else {
-      names(x)[i]
-    }
-    return(ret)
-  }
-  if (is.character(i) && !is.null(names(x))) {
-    i <- which(names(x) %in% i)
-  }
-  stopifnot(length(i) == 1 & is.numeric(i))
-  stopifnot(i <= length(x) & i >= 1)
-  i <- as.integer(i)
-  dat <- partykit::data_party(x, i)
-  if (!is.null(x$fitted)) {
-    findx <- which("(fitted)" == names(dat))[1]
-    dat <- dat[, -(findx:ncol(dat)), drop = FALSE]
-    if (ncol(dat) == 0) {
-      dat <- x$data
-    }
-  } else {
-    dat <- x$data
-  }
-  rule <- c()
-  rec_fun <- function(node) {
-    if (partykit::id_node(node) == i) {
-      return(NULL)
-    }
-    kid <- sapply(partykit::kids_node(node), partykit::id_node)
-    whichkid <- max(which(kid <= i))
-    split <- partykit::split_node(node)
-    ivar <- partykit::varid_split(split)
-    svar <- names(dat)[ivar]
-    index <- partykit::index_split(split)
-    if (is.factor(dat[, svar])) {
-      if (is.null(index)) {
-        index <- ((1:nlevels(dat[, svar])) > partykit::breaks_split(split)) +
-          1
-      }
-      slevels <- levels(dat[, svar])[index == whichkid]
-      srule <- paste(svar, " %in% c(\"", paste(slevels,
-        collapse = "\", \"", sep = ""
-      ), "\")", sep = "")
-    } else {
-      if (is.null(index)) {
-        index <- seq_along(kid)
-      }
-      breaks <- cbind(c(-Inf, partykit::breaks_split(split)), c(
-        partykit::breaks_split(split),
-        Inf
-      ))
-      sbreak <- breaks[index == whichkid, ]
-      right <- partykit::right_split(split)
-      srule <- c()
-      if (is.finite(sbreak[1])) {
-        srule <- c(srule, paste(svar, ifelse(right, ">",
-          ">="
-        ), sbreak[1]))
-      }
-      if (is.finite(sbreak[2])) {
-        srule <- c(srule, paste(svar, ifelse(right, "<=",
-          "<"
-        ), sbreak[2]))
-      }
-      srule <- paste(srule, collapse = " & ")
-    }
-    rule <<- c(rule, srule)
-    return(rec_fun(node[[whichkid]]))
-  }
-  node <- rec_fun(partykit::node_party(x))
-  paste(rule, collapse = " & ")
-}
diff --git a/man/calc_final_effect_mod_param.Rd b/man/calc_final_effect_mod_param.Rd
deleted file mode 100644
index 8e18487..0000000
--- a/man/calc_final_effect_mod_param.Rd
+++ /dev/null
@@ -1,36 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/final_result_utils.R
-\name{calc_final_effect_mod_param}
-\alias{calc_final_effect_mod_param}
-\title{Calculates the Effect Modification Shift Parameter}
-\usage{
-calc_final_effect_mod_param(
-  tmle_fit_av,
-  tmle_fit_at,
-  exposure,
-  at,
-  av,
-  effect_m_name,
-  fold_k
-)
-}
-\arguments{
-\item{tmle_fit_av}{TMLE results for the validation fold.}
-
-\item{tmle_fit_at}{TMLE results for the training fold.}
-
-\item{exposure}{The identified exposure variable as a \code{character} string.}
-
-\item{at}{Training dataset.}
-
-\item{av}{Validation dataset.}
-
-\item{effect_m_name}{The name of the effect modifier variable as a \code{character} string.}
-
-\item{fold_k}{The fold in which the effect modification was found.}
-}
-\description{
-This function uses a decision tree estimator to regress the difference in expected outcomes
-of an exposure onto a covariate. Both the exposure and the covariate have been determined through data-adaptive methods.
-If no rules are found, a median split is applied.
-}
diff --git a/man/list_rules_party.Rd b/man/list_rules_party.Rd
deleted file mode 100644
index 0baf6eb..0000000
--- a/man/list_rules_party.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils.R
-\name{list_rules_party}
-\alias{list_rules_party}
-\title{Get rules from partykit object in rule fitting}
-\usage{
-list_rules_party(x, i = NULL, ...)
-}
-\arguments{
-\item{x}{Partykit glmtree model object}
-
-\item{i}{null}
-
-\item{...}{additional arguments}
-}
-\value{
-List of rules
-}
-\description{
-Get rules from partykit object in rule fitting
-}