03-analyses.Rmd

---
title: "CMA"
author: "Paloma Cárcamo"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse, CMAverse, gtsummary)
```

## Load data

```{r}
data_raw <- read_rds("data/joined-db-clean.rds")

data <- data_raw |> 
  mutate(parity = cut(children, 
                      breaks = c(0, 1, 3, 100), 
                      labels = c("0-1", "2-3", "3+"), 
                      include.lowest = TRUE),
         inc_cat = cut(income, 
                       breaks = quantile(data_raw$income, c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE), 
                       labels = c("Q1", "Q2", "Q3", "Q4"),
                       include.lowest = TRUE),
         bmi_cat = cut(bmi,
                       breaks = c(0, 25, 30, 1000),
                       labels = c("Normal or underweight", "Overweight", "Obesity")),
         smoking = factor(smoking),
         mi = factor(mi),
         oc_use = factor(oc_use),
         hbp = factor(hbp),
         hbp_med = factor(hbp_med),
         cohort = factor(cohort)) |> 
  select(age, mi, oc_use, hbp, cohort, smoking, inc_cat, parity, bmi_cat, hbp_med, children, income, bmi) |> 
  na.omit()

data_withnas <- data_raw |> 
  mutate(parity = cut(children, 
                      breaks = c(0, 1, 3, 100), 
                      labels = c("0-1", "2-3", "3+"), 
                      include.lowest = TRUE),
         inc_cat = cut(income, 
                       breaks = quantile(data_raw$income, c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE), 
                       labels = c("Q1", "Q2", "Q3", "Q4"),
                       include.lowest = TRUE),
         bmi_cat = cut(bmi,
                       breaks = c(0, 25, 30, 1000),
                       labels = c("Normal or underweight", "Overweight", "Obesity")),
         smoking = factor(smoking),
         mi = factor(mi),
         oc_use = factor(oc_use),
         hbp = factor(hbp),
         hbp_med = factor(hbp_med),
         cohort = factor(cohort),
         length_ocuse = oc_yearstop - oc_yearstart,
         length_ocuse = if_else(length_ocuse < 0, oc_yearstart-oc_yearstop, length_ocuse)) |> 
  select(mi, oc_use, hbp, cohort, smoking, inc_cat, parity, bmi_cat, hbp_med, length_ocuse)
```

## Table 1: Baseline characteristics

```{r}
## Only complete observations
tbl_summary(data = data |> 
              mutate(oc_use = factor(oc_use, levels = c(1, 0), labels = c("OC use", "No OC use"))), by = oc_use) |> 
  add_p() |> 
  add_overall()

## All observations
tbl_summary(data = data_withnas |> 
              mutate(oc_use = factor(oc_use, levels = c(1, 0), labels = c("OC use", "No OC use"))), by = oc_use) |> 
  add_p() |> 
  add_overall()
```

## Table 2: Conditional and marginal models

### Crude outcome regression

```{r}
mod1 <- glm(mi ~ oc_use, family = binomial, data = data)

summary(mod1)

exp(cbind(OR = coef(mod1), confint(mod1)))
```

### Adjusted outcome regression

```{r}
mod2 <- glm(mi ~ oc_use + cohort + smoking + inc_cat + parity, family = binomial, data = data)

summary(mod2)

exp(cbind(OR = coef(mod2), confint(mod2)))
```

### IPTW

```{r}
mod1PS <- glm(oc_use ~ cohort + smoking + inc_cat + parity, family = binomial, data = data)

summary(mod1PS)

data$ps <- predict(mod1PS, type = "response")

# Create the numerator of IPTW for stabilized weight
fitpn_oc <- glm(oc_use ~ 1, family = binomial, data = data)

summary(fitpn_oc)

data$pn_os <- predict(fitpn_oc, type = "response")

# Calculate the weights for the exposed/treated and the unexposed/untreated
data <- data |> 
  mutate(sw_t = ifelse(oc_use == 1,
                       yes = pn_os / ps,
                       no = (1 - pn_os)/(1 - ps)))

summary(data$sw_t)

# Fit model with weights
fitsw <- glm(mi ~ oc_use, family = binomial, data = data, weights = sw_t)

summary(fitsw)

cbind(beta = exp(coef(fitsw)), exp(confint(fitsw)))
```

## Table 3: Causal mediation analysis

```{r}
set.seed(11112)

res_msm <- cmest(data = data,
                 model = "msm", 
                 outcome = "mi", 
                 exposure = "oc_use",
                 mediator = "hbp", 
                 basec = c("cohort", "smoking", "inc_cat", "children", "bmi_cat", "hbp_med"), 
                 EMint = TRUE,
                 
                 ereg =  glm(oc_use ~ cohort + smoking + inc_cat + children, family = binomial, data = data), # Exposure regression for weighting (IPW for exposure)
                 
                 wmnomreg = list(glm(hbp ~ 1, family = binomial, data = data)), # IPW for mediator (numerator)
                 
                 wmdenomreg = list(glm(hbp ~ oc_use + cohort + smoking + inc_cat + children + bmi_cat, family = binomial, data = data)), # IPW for mediator (denominator)
                 
                 yreg = glm(mi ~ oc_use + hbp + oc_use*hbp, family = binomial, data = data),   # Outcome regression for CDE
                 
                 mreg = list(glm(hbp ~ oc_use, family = binomial, data = data)), # Mediator regression
                 
                 astar = 0, 
                 a = 1, 
                 mval = list(0), 
                 estimation = "imputation", 
                 inference = "bootstrap", 
                 nboot = 1000)

summary(res_msm)
```

## HBP and MI

```{r}
m1 <- glm(mi ~ hbp, family = binomial, data = data)
m2 <- glm(mi ~ hbp, family = binomial, data = data_withnas)

summary(m1)
summary(m2)

exp(cbind(OR = coef(m1), confint(m1)))
exp(cbind(OR = coef(m2), confint(m2)))
```