diff --git a/DESCRIPTION b/DESCRIPTION index 06f4f69..9f455ca 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,7 +33,11 @@ Imports: purrr, polspline, pracma, - haldensify + haldensify, + ranger, + arm, + xgboost, + nnls Suggests: rmarkdown, knitr, diff --git a/R/IsoXshift.R b/R/IsoXshift.R index eea0373..5524cf1 100644 --- a/R/IsoXshift.R +++ b/R/IsoXshift.R @@ -104,7 +104,7 @@ IsoXshift <- function(w, num_cores = 2, seed = seed, hn_trunc_thresh = 10, - top_n = 2) { + top_n = 1) { # coerce W to matrix and, if no names in W, assign them generically if (!is.data.frame(w)) w <- as.data.frame(w) @@ -190,7 +190,7 @@ IsoXshift <- function(w, pie_min_effort_shift <- list() - fold_InterXshift_results <- furrr::future_map( + fold_IsoXshift_results <- furrr::future_map( unique(data_internal$folds), function(fold_k) { fold_intxn_results <- fold_basis_results[[fold_k]] @@ -295,7 +295,7 @@ IsoXshift <- function(w, .options = furrr::furrr_options(seed = seed, packages = "IsoXshift") ) - fold_min_shift_results <- unlist(fold_InterXshift_results, recursive = FALSE) + fold_min_shift_results <- unlist(fold_IsoXshift_results, recursive = FALSE) pooled_synergy_shift_results <- calc_pooled_intxn_shifts( diff --git a/R/joint_stoch_shift_est_g_exp.R b/R/joint_stoch_shift_est_g_exp.R index a0c6abf..bbf50f9 100644 --- a/R/joint_stoch_shift_est_g_exp.R +++ b/R/joint_stoch_shift_est_g_exp.R @@ -115,7 +115,7 @@ joint_stoch_shift_est_g_exp <- function(exposures, ) obs_pred_shifted <- g_model$predict(task_obs_shift_rep) - ratio <- obs_pred_no_shifted$likelihood / obs_pred_shifted$likelihood + ratio <- obs_pred_no_shifted / obs_pred_shifted delta_diff <- abs(grid1 - delta) replicated_data$delta_diff <- delta_diff replicated_data$ratio <- ratio @@ -125,8 +125,8 @@ joint_stoch_shift_est_g_exp <- function(exposures, # Find the row with the minimum delta_diff within the filtered data min_diff_row <- filtered_data[which.min(filtered_data$delta_diff), ] - min_diff_row$likelihood_shift <- obs_pred_shifted$likelihood[as.numeric(rownames(min_diff_row))] - min_diff_row$likelihood_no_shift <- obs_pred_no_shifted$likelihood + min_diff_row$likelihood_shift <- obs_pred_shifted[as.numeric(rownames(min_diff_row))] + min_diff_row$likelihood_no_shift <- obs_pred_no_shifted min_diff_row$delta <- min_diff_row[, exposure] - obs_data[, exposure] aggregate_results <- rbind(aggregate_results, min_diff_row) diff --git a/README.Rmd b/README.Rmd index f686f2d..1350882 100644 --- a/README.Rmd +++ b/README.Rmd @@ -49,7 +49,7 @@ We define interaction as the counterfactual mean of the outcome under stochastic To utilize the package, users need to provide vectors for exposures, covariates, and outcomes. They also specify the target_outcome_lvl for the outcome, epsilon, which is some allowed closeness to the target. For example, if the target outcome level is 15, and epsilon is 0.5, then interventions that lead to 15.5 are considered. The restriction limit is hn_trunc_thresh which is the allowed distance from the original exposure likelihood. 10 for example indicates that the likelihood should not be more than x10 difference from the original exposure level likelihood. That is, if an individual's likelihood is originally 0.1 given their covariate history and the likelihood of exposure to the intervened level is 0.01, this is 10 times different and would be the limit intervention. A detailed guide is provided in the vignette. With these inputs, `IsoXshift` processes the data and delivers tables showcasing fold-specific results and aggregated outcomes, allowing users to glean insights effectively. -`IsoXshift` also incorporates features from the `sl3` package [@coyle-sl3-rpkg], facilitating ensemble machine learning in the estimation process. If the user does not specify any stack parameters, `InterXshift` will automatically create an ensemble of machine learning algorithms that strike a balance between flexibility and computational efficiency. +`IsoXshift` also incorporates features from the `sl3` package [@coyle-sl3-rpkg], facilitating ensemble machine learning in the estimation process. If the user does not specify any stack parameters, `IsoXshift` will automatically create an ensemble of machine learning algorithms that strike a balance between flexibility and computational efficiency. --- @@ -123,11 +123,11 @@ sim_results <- IsoXshift( w = w, a = a, y = y, - n_folds = 5, + n_folds = 6, num_cores = 6, outcome_type = "continuous", seed = seed, - target_outcome_lvl = 15, + target_outcome_lvl = 12, epsilon = 0.5 ) proc.time() - ptm diff --git a/README.md b/README.md index d7c3cf7..eaf8e87 100644 --- a/README.md +++ b/README.md @@ -95,9 +95,9 @@ insights effectively. `IsoXshift` also incorporates features from the `sl3` package (Coyle, Hejazi, Malenica, et al. 2022), facilitating ensemble machine learning in the estimation process. If the user does not specify any stack -parameters, `InterXshift` will automatically create an ensemble of -machine learning algorithms that strike a balance between flexibility -and computational efficiency. +parameters, `IsoXshift` will automatically create an ensemble of machine +learning algorithms that strike a balance between flexibility and +computational efficiency. ------------------------------------------------------------------------ @@ -108,27 +108,10 @@ that allows ensemble machine learning to be used for nuisance parameter estimation and `sl3` is not on CRAN the `IsoXshift` package is not available on CRAN and must be downloaded here. -There are many depedencies for `IsoXshift` so it’s easier to break up -installation of the various packages to ensure proper installation. - -First install the basis estimators used in the data-adaptive variable -discovery of the exposure and covariate space: - -``` r -install.packages("earth") -install.packages("hal9001") -``` - `IsoXshift` uses the `sl3` package to build ensemble machine learners for each nuisance parameter. We have to install off the development branch, first download these two packages for `sl3` -``` r -install.packages(c("ranger", "arm", "xgboost", "nnls")) -``` - -Now install `sl3` on devel: - ``` r remotes::install_github("tlverse/sl3@devel") ``` @@ -139,13 +122,6 @@ Make sure `sl3` installs correctly then install `IsoXshift` remotes::install_github("blind-contours/IsoXshift@main") ``` -`IsoXshift` has some other miscellaneous dependencies that are used in -the examples as well as in the plotting functions. - -``` r -install.packages(c("kableExtra", "hrbrthemes", "viridis")) -``` - ------------------------------------------------------------------------ ## Example @@ -443,7 +419,7 @@ W Let’s look at the interactions built into this synthetic data: @@ -459,77 +435,91 @@ sim_results <- IsoXshift( w = w, a = a, y = y, - n_folds = 5, + n_folds = 6, num_cores = 6, outcome_type = "continuous", seed = seed, - target_outcome_lvl = 15, + target_outcome_lvl = 12, epsilon = 0.5 ) +#> Growing trees.. Progress: 47%. Estimated remaining time: 1 minute, 36 seconds. #> -#> Iter: 1 fn: 191.9358 Pars: 0.0000003908 0.9999996094 -#> Iter: 2 fn: 191.9358 Pars: 0.0000001602 0.9999998398 +#> Iter: 1 fn: 222.8222 Pars: 0.02601 0.97399 +#> Iter: 2 fn: 222.8222 Pars: 0.02601 0.97399 #> solnp--> Completed in 2 iterations #> -#> Iter: 1 fn: 448.1996 Pars: 0.03804 0.96196 -#> Iter: 2 fn: 448.1996 Pars: 0.03804 0.96196 +#> Iter: 1 fn: 494.9785 Pars: 0.000007745 0.999992255 +#> Iter: 2 fn: 494.9785 Pars: 0.00000003548 0.99999996452 #> solnp--> Completed in 2 iterations #> -#> Iter: 1 fn: 461.3076 Pars: 0.22580 0.77420 -#> Iter: 2 fn: 461.3076 Pars: 0.22580 0.77420 +#> Iter: 1 fn: 487.6227 Pars: 0.999994298 0.000005701 +#> Iter: 2 fn: 487.6227 Pars: 0.9999991589 0.0000008411 +#> Iter: 3 fn: 487.6227 Pars: 0.9999994859 0.0000005141 +#> solnp--> Completed in 3 iterations +#> +#> Iter: 1 fn: 226.2807 Pars: 0.05745 0.94255 +#> Iter: 2 fn: 226.2807 Pars: 0.05745 0.94255 #> solnp--> Completed in 2 iterations #> -#> Iter: 1 fn: 180.8586 Pars: 0.02992 0.97008 -#> Iter: 2 fn: 180.8586 Pars: 0.02991 0.97009 +#> Iter: 1 fn: 484.0375 Pars: 0.0000005784 0.9999994218 +#> Iter: 2 fn: 484.0375 Pars: 0.0000001083 0.9999998917 #> solnp--> Completed in 2 iterations #> -#> Iter: 1 fn: 466.2345 Pars: 0.003087 0.996913 -#> Iter: 2 fn: 466.2345 Pars: 0.002355 0.997645 +#> Iter: 1 fn: 479.8223 Pars: 0.37822 0.62178 +#> Iter: 2 fn: 479.8223 Pars: 0.37820 0.62180 #> solnp--> Completed in 2 iterations #> -#> Iter: 1 fn: 464.6877 Pars: 0.999994892 0.000005108 -#> Iter: 2 fn: 464.6877 Pars: 0.999998399 0.000001601 +#> Iter: 1 fn: 221.0642 Pars: 0.00000005466 0.99999994546 +#> Iter: 2 fn: 221.0642 Pars: 0.00000003637 0.99999996363 #> solnp--> Completed in 2 iterations #> -#> Iter: 1 fn: 199.3252 Pars: 0.00000004247 0.99999995594 -#> Iter: 2 fn: 199.3252 Pars: 0.0000000009188 0.9999999990812 -#> Iter: 3 fn: 199.3252 Pars: 0.0000000005678 0.9999999994322 -#> solnp--> Completed in 3 iterations +#> Iter: 1 fn: 496.3027 Pars: 0.21429 0.78571 +#> Iter: 2 fn: 496.3027 Pars: 0.21398 0.78602 +#> solnp--> Completed in 2 iterations +#> +#> Iter: 1 fn: 491.9260 Pars: 0.0000125 0.9999875 +#> Iter: 2 fn: 491.9260 Pars: 0.000007487 0.999992513 +#> solnp--> Completed in 2 iterations #> -#> Iter: 1 fn: 453.2732 Pars: 0.06791 0.93209 -#> Iter: 2 fn: 453.2732 Pars: 0.06788 0.93212 +#> Iter: 1 fn: 104.6614 Pars: 0.08134 0.91866 +#> Iter: 2 fn: 104.6614 Pars: 0.08134 0.91866 #> solnp--> Completed in 2 iterations #> -#> Iter: 1 fn: 451.6012 Pars: 0.05533 0.94467 -#> Iter: 2 fn: 451.6012 Pars: 0.05532 0.94468 +#> Iter: 1 fn: 495.9732 Pars: 0.41785 0.58215 +#> Iter: 2 fn: 495.9732 Pars: 0.41785 0.58215 #> solnp--> Completed in 2 iterations #> -#> Iter: 1 fn: 222.6728 Pars: 0.000000003476 0.999999995710 -#> Iter: 2 fn: 222.6728 Pars: 0.000000002283 0.999999997717 +#> Iter: 1 fn: 495.9761 Pars: 0.9999993936 0.0000006071 +#> Iter: 2 fn: 495.9761 Pars: 0.9999996437 0.0000003563 #> solnp--> Completed in 2 iterations #> -#> Iter: 1 fn: 453.2119 Pars: 0.12281 0.87719 -#> Iter: 2 fn: 453.2119 Pars: 0.12282 0.87718 +#> Iter: 1 fn: 220.9891 Pars: 0.00000000428 0.99999999571 +#> Iter: 2 fn: 220.9891 Pars: 0.00000000105 0.99999999895 #> solnp--> Completed in 2 iterations #> -#> Iter: 1 fn: 459.0837 Pars: 0.40415 0.59585 -#> Iter: 2 fn: 459.0837 Pars: 0.40415 0.59585 +#> Iter: 1 fn: 496.7959 Pars: 0.32776 0.67224 +#> Iter: 2 fn: 496.7959 Pars: 0.32777 0.67223 #> solnp--> Completed in 2 iterations #> -#> Iter: 1 fn: 224.4303 Pars: 0.05369 0.94631 -#> Iter: 2 fn: 224.4303 Pars: 0.05369 0.94631 +#> Iter: 1 fn: 498.7841 Pars: 0.99999888 0.00000112 +#> Iter: 2 fn: 498.7841 Pars: 0.9999997064 0.0000002936 #> solnp--> Completed in 2 iterations #> -#> Iter: 1 fn: 484.9169 Pars: 0.37866 0.62134 -#> Iter: 2 fn: 484.9169 Pars: 0.37866 0.62134 +#> Iter: 1 fn: 235.0054 Pars: 0.03661 0.96339 +#> Iter: 2 fn: 235.0054 Pars: 0.03661 0.96339 #> solnp--> Completed in 2 iterations #> -#> Iter: 1 fn: 488.1615 Pars: 0.99999415 0.00000585 -#> Iter: 2 fn: 488.1614 Pars: 0.9999997443 0.0000002557 +#> Iter: 1 fn: 492.8295 Pars: 0.999995555 0.000004445 +#> Iter: 2 fn: 492.8295 Pars: 0.999998947 0.000001053 +#> Iter: 3 fn: 492.8295 Pars: 0.9999995304 0.0000004696 +#> solnp--> Completed in 3 iterations +#> +#> Iter: 1 fn: 491.7396 Pars: 0.9999963 0.0000037 +#> Iter: 2 fn: 491.7396 Pars: 0.999997994 0.000002006 #> solnp--> Completed in 2 iterations proc.time() - ptm -#> user system elapsed -#> 61.051 3.082 920.514 +#> user system elapsed +#> 89.792 5.104 1173.784 oracle_parameter <- sim_results$`Oracle Pooled Results` k_fold_results <- sim_results$`K-fold Results` @@ -588,25 +578,25 @@ Fold
intro_IsoXshift.Rmd
McCoy D (2024). -IsoXshift: Semi-Parametric Identification and Estimation of Interaction using Stochastic Interventions. +IsoXshift: Isobolic Identification and Estimation of Interaction using Stochastic Interventions. R package version 1.0.1.
@Manual{, - title = {IsoXshift: Semi-Parametric Identification and Estimation of Interaction using Stochastic Interventions}, + title = {IsoXshift: Isobolic Identification and Estimation of Interaction using Stochastic Interventions}, author = {David McCoy}, year = {2024}, note = {R package version 1.0.1}, diff --git a/docs/contributing.html b/docs/contributing.html index 7086ce6..d7663e1 100644 --- a/docs/contributing.html +++ b/docs/contributing.html @@ -47,7 +47,7 @@Contributing to IsoXshift
We love your input! We want to make contributing to this project as easy and transparent as possible, whether it’s:
IsoXshift
?We the estimate the impact of our “intention to intervene” using CV-TMLE. Using this oracle point paramater as our target we shift individuals as close as possible to this level without violating the density ratio, the intervention level exposure likelihood compared to observed level likelihood. Thus, each individuals actual intervention is different but is aimed towards the target, hence intention to intervene.
We define interaction as the counterfactual mean of the outcome under stochastic interventions of two exposures compared to the additive counterfactual mean of the two exposures intervened on independently. These interventions or exposure changes depend on naturally observed values, as described in past literature (Dı́az and van der Laan 2012; Haneuse and Rotnitzky 2013), but with our new parameter in mind. Thus, what is estimated is like asking, what the expected outcome is if we were to enforce the most efficient policy intervention in a realistic setting where not everyone can actually recieve that exact exposure level or levels.
To utilize the package, users need to provide vectors for exposures, covariates, and outcomes. They also specify the target_outcome_lvl for the outcome, epsilon, which is some allowed closeness to the target. For example, if the target outcome level is 15, and epsilon is 0.5, then interventions that lead to 15.5 are considered. The restriction limit is hn_trunc_thresh which is the allowed distance from the original exposure likelihood. 10 for example indicates that the likelihood should not be more than x10 difference from the original exposure level likelihood. That is, if an individual’s likelihood is originally 0.1 given their covariate history and the likelihood of exposure to the intervened level is 0.01, this is 10 times different and would be the limit intervention. A detailed guide is provided in the vignette. With these inputs, IsoXshift
processes the data and delivers tables showcasing fold-specific results and aggregated outcomes, allowing users to glean insights effectively.
IsoXshift
also incorporates features from the sl3
package (Coyle, Hejazi, Malenica, et al. 2022), facilitating ensemble machine learning in the estimation process. If the user does not specify any stack parameters, InterXshift
will automatically create an ensemble of machine learning algorithms that strike a balance between flexibility and computational efficiency.
IsoXshift
also incorporates features from the sl3
package (Coyle, Hejazi, Malenica, et al. 2022), facilitating ensemble machine learning in the estimation process. If the user does not specify any stack parameters, IsoXshift
will automatically create an ensemble of machine learning algorithms that strike a balance between flexibility and computational efficiency.
Note: Because the IsoXshift
package (currently) depends on sl3
that allows ensemble machine learning to be used for nuisance parameter estimation and sl3
is not on CRAN the IsoXshift
package is not available on CRAN and must be downloaded here.
There are many depedencies for IsoXshift
so it’s easier to break up installation of the various packages to ensure proper installation.
First install the basis estimators used in the data-adaptive variable discovery of the exposure and covariate space:
-
-install.packages("earth")
-install.packages("hal9001")
IsoXshift
uses the sl3
package to build ensemble machine learners for each nuisance parameter. We have to install off the development branch, first download these two packages for sl3
-install.packages(c("ranger", "arm", "xgboost", "nnls"))
Now install sl3
on devel:
+remotes::install_github("tlverse/sl3@devel")
Make sure
-sl3
installs correctly then installIsoXshift
+-remotes::install_github("blind-contours/IsoXshift@main")
-
IsoXshift
has some other miscellaneous dependencies that are used in the examples as well as in the plotting functions.-install.packages(c("kableExtra", "hrbrthemes", "viridis"))
Example
To illustrate how
-IsoXshift
may be used to ascertain the effect of a mixed exposure, we will use synthetic data from the National Institute of Environmental Health. Let’s first load the relevant packages:+library(IsoXshift) library(devtools) #> Loading required package: usethis @@ -126,9 +115,9 @@
Example seed <- 429153 set.seed(seed)
We will directly use synthetic data from the NIEHS used to test new mixture methods. This data has built in strong positive and negative marginal effects and certain interactions. Found here: https://github.com/niehs-prime/2015-NIEHS-MIxtures-Workshop
-
Let’s look at the interactions built into this synthetic data:
-