Merge branch 'main' of https://github.com/steno-aarhus/mediation-anal…

…ysis-course
steno-aarhus · May 28, 2024 · 613c49b · 613c49b
2 parents 92286c4 + d7588c9
commit 613c49b
Show file tree

Hide file tree

Showing 3 changed files with 162 additions and 90 deletions.
diff --git a/sessions/causal-mediation-analysis-estimation.qmd b/sessions/causal-mediation-analysis-estimation.qmd
@@ -455,37 +455,6 @@ digraph {
 }")
 ```
 
-```{r  echo=FALSE }
-# Creating The causal diagram for a mediation model
-library(DiagrammeR)
-grViz("
-digraph {
-  graph []
-  node [shape = plaintext]
-    W [label = 'SEP']
-    X [label = 'Alcohol intake']
-    M [label = 'GGT']
-    L [label = 'BMI']
-    U 
-    Y [label = 'SBP']
-  edge [minlen = 1.5]
-    X->Y 
-    X->L
-    L->Y
-    L->M
-    U->L
-    U->Y
-    X->M 
-    M->Y 
-    W->X
-    W->M
-    W->L
-    W->Y
-  { rank = same; X; M; Y }
-  { rank = min; L; w}
-}")
-```
-
 ### Time-varying confounding
 
 ```{r  echo=FALSE }
@@ -591,65 +560,109 @@ datasim <- function(n) {
   x3 <- rnorm(n, 0, 1)
   x4 <- rnorm(n, 0, 1)
   A <- rbinom(n, size = 1, prob = plogis(-1.6 + 2.5 * x2 + 2.2 * x3 + 0.6 * x4 + 0.4 * x2 * x4))
+  M <- 0.5 * A + 0.3 * x1 + rnorm(n)
   Y.1 <- rbinom(n, size = 1, prob = plogis(-0.7 + 1 - 0.15 * x1 + 0.45 * x2 + 0.20 * x4 + 0.4 * x2 * x4))
   Y.0 <- rbinom(n, size = 1, prob = plogis(-0.7 + 0 - 0.15 * x1 + 0.45 * x2 + 0.20 * x4 + 0.4 * x2 * x4))
   Y <- Y.1 * A + Y.0 * (1 - A)
-  data.frame(x1, x2, x3, x4, A, Y, Y.1, Y.0)
+  data.frame(x1, x2, x3, x4, A, M, Y, Y.1, Y.0)
 }
 
 set.seed(120110) # for reproducibility
 ObsData <- datasim(n = 10000) # really large sample
 TRUE_EY.1 <- mean(ObsData$Y.1)
 TRUE_EY.1 # mean outcome under A = 1
 TRUE_EY.0 <- mean(ObsData$Y.0)
-TRUE_EY.0
+# True average causal effect
+TRUE_ACE <- TRUE_EY.1 - TRUE_EY.0
+TRUE_EY.1 - TRUE_EY.0
 TRUE_MOR <- (TRUE_EY.1 * (1 - TRUE_EY.0)) / ((1 - TRUE_EY.1) * TRUE_EY.0)
 TRUE_MOR # true marginal OR
 ```
 
-The Total effect if we use traditional model:
+Step 1: Fit the models
 
 ```{r}
-# Fit a logistic regression model predicting Y from the relevant confounders
-Q <- glm(Y ~ A + x2 + x4 + x2 * x4, data = ObsData, family = binomial)
-exp(Q$coef[2])
+##fit the mediator as a function of A and W
+mediator_model <- lm(M ~ A + x1 + x2 + x3 + x4, data = ObsData)
+##fit the outcome as a function of A, M and W
+outcome_model <- glm(Y ~ A + M + x1 + x2 + x3 + x4, data = ObsData, family = binomial)
 ```
 
-Let's use g-computation!
+Step 2: Duplicate the initial dataset in two counterfactual datasets. In
+one world, set A=1; in the other, set A=0. All other variables keep the
+original values.
 
 ```{r}
-# Copy the "actual" (simulated) data twice
+# Copy the "actual" data twice
 A1Data <- A0Data <- ObsData
-
 # In one world A equals 1 for everyone, in the other one it equals 0 for everyone
 # The rest of the data stays as is (for now)
-
 A1Data$A <- 1
 A0Data$A <- 0
+```
+
+Step 3. Apply the function from step 1 to predict each individual's
+mediator and outcome in the two counterfactual datasets.
 
-# Predict Y if A=1
-Y_A1 <- predict(Q, A1Data, type = "response")
-# Predict Y if A=0
-Y_A0 <- predict(Q, A0Data, type = "response")
+```{r}
+# Predict M if A=1
+M_A1 <- predict(mediator_model, newdata=A1Data)
+# Predict M if A=0
+M_A0 <- predict(mediator_model, newdata=A0Data)
 # Taking a look at the predictions
-data.frame(Y_A1 = head(Y_A1), Y_A0 = head(Y_A0), TRUE_Y = head(ObsData$Y)) |> round(digits = 2)
-
-# Mean outcomes in the two worlds
-pred_A1 <- mean(Y_A1)
-pred_A0 <- mean(Y_A0)
-# Marginal odds ratio
-MOR_gcomp <- (pred_A1 * (1 - pred_A0)) / ((1 - pred_A1) * pred_A0)
-# ATE (risk difference)
-RD_gcomp <- pred_A1 - pred_A0
-c(MOR_gcomp, RD_gcomp) |> round(digits = 2)
+data.frame(M_A1 = head(M_A1), M_A0 = head(M_A0))
+
+
+# Add the predicted mediator values to the respective datasets
+A1Data$M <- M_A1
+A0Data$M <- M_A0
+# Predict Y if A=1 and MA1 using M_A1
+Y_A1_M1 <- predict(outcome_model, newdata = A1Data, type = "response")
+# Predict Y if A=0 using MA0 using M_A0
+Y_A0_M0 <- predict(outcome_model, newdata = A0Data, type = "response")
+
+
+# Create dataset for A=1, M=0 and A=0, M=1 scenarios
+A1Data_M0 <- A1Data
+A0Data_M1 <- A0Data
+A1Data_M0$M <- M_A0
+A0Data_M1$M <- M_A1
+
+
+# Predict the outcomes for these scenarios
+Y_A1_M0 <- predict(outcome_model, newdata = A1Data_M0, type = "response")
+Y_A0_M1 <- predict(outcome_model, newdata = A0Data_M1, type = "response")
+data.frame(Y_A1_M1 = head(Y_A1_M1), Y_A0_M0 = head(Y_A0_M0),Y_A1_M0 = head(Y_A1_M0),Y_A0_M1 = head(Y_A0_M1))
 ```
 
-The demo shows how to estimate total effect using g-computation. If you
-would like to estimate PNDE, TNIE, PNIE, TNDE, then you also need to
-predict two potential mediators observed in a hypothetical world in
-which all individuals are intervention (A=1) or non-intervention (A=0).
+Step 4: Compute the average effects
+
+```{r}
+# Average outcome when A = 1
+mean_Y_A1 <- mean(Y_A1_M1)
+# Average outcome when A = 0
+mean_Y_A0 <- mean(Y_A0_M0)
+# Total effect
+total_effect <- mean_Y_A1 - mean_Y_A0
+# Total effect
+total_effect 
+
+# Calculate the average outcomes under counterfactual scenarios
+mean_Y_A1 <- mean(Y_A1_M1)
+mean_Y_A0 <- mean(Y_A0_M0)
+mean_Y_A1_M0 <- mean(Y_A1_M0)
+mean_Y_A0_M1 <- mean(Y_A0_M1)
+mean_Y_A1_M1 <- mean(Y_A1_M1)
+mean_Y_A0_M0 <- mean(Y_A0_M0)
+# Calculate the total effect
+gcomp_ACE <- mean_Y_A1 - mean_Y_A0
+gcomp_ACE
+# Calculate the Marginal Odds Ratio using g-computation predictions
+gcomp_MOR <- (mean_Y_A1 * (1 - mean_Y_A0)) / ((1 - mean_Y_A1) * mean_Y_A0)
+```
 
-We recommend take the advantage of R packages to do the work.
+We recommend take the advantage of R packages to do the work
+(decomposition).
 
 ```{r}
 set.seed(1)

diff --git a/sessions/causal-mediation-analysis-sensitivity-analysis.qmd b/sessions/causal-mediation-analysis-sensitivity-analysis.qmd
@@ -202,10 +202,10 @@ covariables *C* obtained from a logistic regression model.
 The bias factor is defined as **B~*mult*~(*c*)** on the multiplicative
 scale as the ratio of:
 
-1- the risk ratio (or odds ratio, with a **rare outcome**) comparing *A*
+1- The risk ratio (or odds ratio, with a **rare outcome**) comparing *A*
 = *a* and *A* = *a*^\*^ conditional on covariables *C*= *c* and
 
-2- what we would have obtained as the risk ratio (or odds ratio) had we
+2- What we would have obtained as the risk ratio (or odds ratio) had we
 been able to condition on both C and U.
 
 We now make the simplifying assumptions that **(A8.1.3)** *U* is binary
@@ -216,11 +216,11 @@ the same for those with exposure level *A* = *a* and exposure level *A*
 If these assumptions hold, we will let *γ* be the effect of *U* on *Y*
 conditional on *A* and *C* on the risk ratio scale, that is:
 
-$\frac{γ = P(Y = 1\|a, c,U = 1)}{P(Y = 1\|a, c,U = 0)}$
+$γ = \frac{ P(Y = 1\|a, c,U = 1)}{P(Y = 1\|a, c,U = 0)}$
 
 By assumption **(A8.1.2b)**
 
-$\frac{γ = P(Y=1\|a,c,U=1)}{P(Y=1\|a,c,U=0)}$
+$γ = \frac{P(Y=1\|a,c,U=1)}{P(Y=1\|a,c,U=0)}$
 
 is the same for both levels of the exposure.
 
@@ -237,19 +237,19 @@ We can use the bias formula by specifying the effect of *U* on *Y* on
 the risk ratio scale and the prevalence of *U* among those with exposure
 levels *A* = *a* and *A* = *a*^\*^.
 
-Once we have calculated the bias term B\~*mult\~*(*c*), we can estimate
-our risk ratio controlling only for *C* (if the outcome is rare, fit a
-logistic regression) and we divide our estimate by B~mult~(*c*) to get
-the corrected estimate for risk ratio—that is, what we would have
-obtained if we had adjusted for *U* as well.
+Once we have calculated the bias term **B~*mult*~(*c*)**, we can
+estimate our risk ratio controlling only for *C* (if the outcome is
+rare, fit a logistic regression) and we divide our estimate by
+**B~*mult*~(*c*)** to get the corrected estimate for risk ratio—that is,
+what we would have obtained if we had adjusted for *U* as well.
 
 Under the simplifying assumptions of (A8.1.1) and (A8.1.2b), we can also
 obtain corrected confidence intervals by dividing both limits of the
-confidence interval by B\~*mult\~*(*c*).
+confidence interval by **B~*mult*~(*c*)**.
 
 Note that to use the bias factor in (8.2), we must specify the
-prevalence of the unmeasured confounder in both exposure groups *P*(*U*
-= 1\|*a*, *c*) and *P*(*U* = 1\|*a*^\*^, *c*), not just the difference
+prevalence of the unmeasured confounder in both exposure groups *P*(*U*=
+1\|*a*, *c*) and *P*(*U* = 1\|*a*^\*^, *c*), not just the difference
 between these two prevalences as in (8.1) for outcomes on the additive
 scale.
 
@@ -261,7 +261,9 @@ explain away an effect and also
 2- the sensitivity analysis parameters that would be required to shift
 the confidence interval to just include the null.
 
-## Sensitivity analysis for controled direct effects for a continuous outcome
+## Sensitivity analysis for controled direct effects
+
+### Continuous outcomes
 
 Assume that controlling for (*C,U*) would suffice to control for
 exposure--outcome and mediator--outcome confounding but that no data are
@@ -299,7 +301,7 @@ If we have not adjusted for *U*, then our estimates controlling only for
 We will consider estimating the controlled direct effect, *CDE*(*m*),
 with the mediator fixed to *m* conditional on the covariables *C* = *c*.
 
-Let \*B\^{CDE}\_{add}*(*m*\|*c\*) denote the difference between:
+Let $B^{CDE}_{add}(m|c)$ denote the difference between:
 
 1- the estimate of the *CDE* conditional on *C*
 
@@ -332,11 +334,11 @@ Under assumptions (A8.1.1) and (A8.2.2b), the bias factor is simply
 given by the product of these two sensitivity-analysis parameters
 (VanderWeele, 2010a):
 
-\*B\^{CDE}\_{add}*(*m*\|*c*) =* δmγm\*
+$B^{CDE}_{add}(m|c) = δmγm$ **8.3**
 
 This formula states that under assumptions (A8.1.1) and (A8.2.2b) the
-bias factor B\^{CDE}\_{add}(*m*\|*c*) for the *CDE*(*m*) is simply given
-by the product *δmγm*.
+bias factor $B^{CDE}_{add}(m|c)$ for the *CDE*(*m*) is simply given by
+the product *δmγm*.
 
 Under these simplifying assumptions, this gives rise to a particularly
 simple sensitivity analysis technique for assessing the sensitivity of
@@ -359,10 +361,9 @@ on *Y*) and by varying *δm*, interpreted as the prevalence difference of
 *U*, comparing exposure levels *a* and *a*^\*^ conditional on *M* = *m*
 and *C* = *c*.
 
-We can subtract the bias factor \*B\^{CDE}\_{add}*(*m*\|*c*) =* δmγm\*
-from the observed estimate to obtain a corrected estimate of the effect
-(what we would have obtained had it been possible to adjust for *U* as
-well).
+We can subtract the bias factor $B^{CDE}_{add}(m|c)$ from the observed
+estimate to obtain a corrected estimate of the effect (what we would
+have obtained had it been possible to adjust for *U* as well).
 
 Under the simplifying assumptions (A8.1.1) and (A8.2.2b), we could also
 subtract this bias factor from both limits of a confidence interval to
@@ -376,6 +377,54 @@ If there is no interaction between the effects of *A* and *M* on *Y*,
 then this simple sensitivity analysis technique based on using formula
 above will also be applicable to natural direct effects as well.
 
+### Binary outcomes
+
+We will consider estimating the controlled direct effect odds ratio from
+Chapter 2, $OR^{CDE}(m)$, with the mediator fixed at level *m*,
+conditional on the covariates *C* = *c*.
+
+This approach will assume a rare outcome but can also be used for risk
+ratios with a common outcome. Let $B^{CDE}_{mult}(m|c)$ denote the ratio
+of
+
+1- The estimate of the controlled direct effect conditional on *C* (
+
+2- What would have been obtained had adjustment been made for *U* as
+well.
+
+Suppose that (A8.1.1) *U* is binary and that (A8.1.2d) the effect of *U*
+on *Y* on the ratio scale, conditional on exposure, mediator, and
+covariables (*A,M,C*), is the same for both exposure levels *A = a* and
+*a*^\*^.
+
+Let *γm* be the effect of *U* on *Y* conditional on *A, C*, and *M = m*,
+that is:
+
+$γm = \frac{P(Y = 1|a, c,m,U = 1)}{P(Y = 1|a, c,m,U = 0)}$
+
+Note that by (A8.1.2), *γm* is the same for both levels of the exposure
+of interest.
+
+Under assumptions (A8.1.1) and (A8.1.2d), the bias factor on the
+multiplicative scale is given by:
+
+$B^{CDE}_{mult}(m|c) = \frac{1+(γm −1)P(U= 1|a,m, c)}{1+(γm−1)P(U = 1\|a∗,m, c)}$
+**(8.4)**
+
+Once we have calculated the bias term $B^{CDE}_{mult}(m|c)$, we can
+estimate the *CDE* risk ratio controlling only for *C* (if the outcome
+is rare), we fit a logistic regression) and we divide our estimate and
+confidence intervals by the bias factor $B^{CDE}_{mult}(m|c)$ to get the
+corrected estimate for CDE risk ratio and its confidence interval—that
+is, what we would have obtained if we had adjusted for *U* a well.
+
+We have to specify the two prevalences of U, namely $P(U = 1|a,m, c)$
+and $P(U = 1|a∗,m, c)$, in the different exposure groups conditional on
+*M* and *C*.
+
+As with *CDE* on an additive scale, the issue of conditioning on *M* in
+the interpretation of these prevalences is important
+
 ## Sensitivity analysis for natural direct and indirect effects
 
 ### Sensitivity analysis for natural direct and indirect effects in the abscence of exposure-mediator interaction
@@ -429,24 +478,35 @@ Because a mediator--outcome confounder does not confound the
 exposure-outcome relationship, we can still obtain valid estimates of
 the total effect.
 
-And, it turns out that the combination of the DE and IE do constitute a
-consistent estimator of the total effect, even though the *DE* and *IE*
-estimators will themselves be biased for the true *NDE* and *NIE*.
+And, it turns out that the combination of the *DE* and *IE* do
+constitute a consistent estimator of the total effect, even though the
+*DE* and *IE* estimators will themselves be biased for the true *NDE*
+and *NIE*.
 
-Knowing that the DE and IE estimates combine to a valid estimate of the
-total effect then allows us to employ the sensitivity analysis
+Knowing that the *DE* and *IE* estimates combine to a valid estimate of
+the total effect then allows us to employ the sensitivity analysis
 techniques for *CDE* for *NIE* as well.
 
-To do so, we use the negation (on the additive scale) of the bias
-formulas that we used for *CDE* (and *NDE*). Thus on the additive scale,
-for a continuous outcome, our bias factor for the *NDE* would simply be:
+To do so, we use the negation (on the additive scale) or the inverse (on
+the multiplicative ratio scale) of the bias formulas used for *CDE* (and
+*NDE*). Thus on the additive scale, for a continuous outcome, our bias
+factor for the *NDE* would simply be:
 
 −*δmγm*
 
 and we could subtract this from the estimate and both limits of the
 confidence interval to obtain a corrected estimate and confidence
 interval for the *NIE*.
 
+For a binary outcome, on the odds ratio scale with rare outcome or risk
+ratio scale with common outcome, our bias factor for the *NIE* would be
+the inverse of that in **(8.4)**:
+
+$\frac{1+(γm−1)P(U=1|a∗,m,c)}{1+(γm−1)P(U=1|a,m,c)}$
+
+and we could divide our *NIE* estimates and its confidence interval by
+this bias factor to obtain a corrected estimate and confidence interval.
+
 We first load the nhanes data:
 
 ```{r}

diff --git a/sessions/causal-mediation-analysis-survival-outcomes.qmd b/sessions/causal-mediation-analysis-survival-outcomes.qmd
@@ -178,7 +178,6 @@ res_rb_coxph <- cmest(
   estimation = "imputation", inference = "bootstrap"
 )
 
-
 summary(res_rb_coxph)
 ```
-Original file line number
+Diff line change
@@ Expand Up / @@ -178,7 +178,6 @@ res_rb_coxph <- cmest( @@
       estimation = "imputation", inference = "bootstrap"
     )
     summary(res_rb_coxph)
     ```
@@ Expand Down @@