geex
vignettes/v06_causal_example.Rmd
v06_causal_example.Rmd
##
## Attaching package: 'geex'
## The following object is masked from 'package:methods':
##
## show
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
TODO: describe what’s going on here
n <- 1000
x <- data_frame(
A = rbinom(n, 1, .2),
Y0 = rnorm(n, 0, 1),
Y1 = rnorm(n, 2 * A, 1),
Y = (A*Y1) + (1 - A)*Y0)
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
ipw_estFUN <- function(data){
A <- data$A
Y <- data$Y
function(theta, phat){
ipw0 <- 1/theta[1]
ipw1 <- 1/theta[2]
# Estimating functions #
c( (1 - A) - theta[1],
A - theta[2],
# Estimating IP weight
Y*(1 - A)*ipw0 - theta[3],
Y*(A)*ipw1 - theta[4],
# Treating IP weight as known
Y*A/phat - theta[5]
)
}
}
phat <- mean(x$A)
out <- m_estimate(ipw_estFUN,
data = x,
inner_args = list(phat = phat),
root_control = setup_root_control(start = c(.5, .5, 0, 0, 0)))
## [1] TRUE
## [1] TRUE
## Comparing variance estimates
geex_vcov <- diag(vcov(out)) * n
# estimates match treating propensity as known
all.equal(var(x$Y * x$A/phat) * (n - 1)/n, geex_vcov[5])
## [1] TRUE
# estimates match using influence function approach
y <- x$Y * x$A/phat - mean(x$Y * x$A/phat)
z <- (x$A - phat) / (phat*(1 - phat))
var(y - predict(lm(y ~ z))) - geex_vcov[4] # close
## [1] 0.004456374
An example \(\psi\) function written
in R
. This function computes the score functions for a GLM,
plus two counterfactual means estimated by inverse probability
weighting.
eefun <- function(data, model, alpha){
X <- model.matrix(model, data = data)
A <- model.response(model.frame(model, data = data))
Y <- data$Y
function(theta){
p <- length(theta)
p1 <- length(coef(model))
lp <- X %*% theta[1:p1]
rho <- plogis(lp)
hh <- ((rho/alpha)^A * ((1-rho)/(1-alpha))^(1 - A))
IPW <- 1/(exp(sum(log(hh))))
score_eqns <- apply(X, 2, function(x) sum((A - rho) * x))
ce0 <- mean(Y * (A == 0)) * IPW / (1 - alpha)
ce1 <- mean(Y * (A == 1)) * IPW / (alpha)
c(score_eqns,
ce0 - theta[p - 1],
ce1 - theta[p])
}
}
Compare to what inferference
gets.
test <- interference(
formula = Y | A ~ X1 | group,
data = vaccinesim,
model_method = 'glm',
allocations = c(.35, .4))
mglm <- glm(A ~ X1, data = vaccinesim, family = binomial)
ce_estimates <- m_estimate(
estFUN = eefun,
data = vaccinesim,
units = 'group',
root_control = setup_root_control(start = c(coef(mglm), .4, .13)),
outer_args = list(alpha = .35, model = mglm)
)
roots(ce_estimates)
## (Intercept) X1
## -0.36869683 -0.02037916 0.42186669 0.15507946
# Compare parameter estimates
direct_effect(test, allocation = .35)$estimate
## [1] 0.2667872
##
## 0.2667872
# conpare SE estimates
L <- c(0, 0, 1, -1)
Sigma <- vcov(ce_estimates)
sqrt(t(L) %*% Sigma %*% L) # from GEEX
## [,1]
## [1,] 0.03716025
direct_effect(test, allocation = .35)$std.error # from inferference
## [1] 0.02602316
I would expect them to be somewhat different, since
inferference
uses a slightly different variance estimator
defined in the web
appendix of Perez-Heydrich et al (2014).
Estimators of causal effects often have the form:
\[\begin{equation} \label{eq:causal} \sum_{i = 1}^m \psi(O_i, \theta) = \sum_{i = 1}^m \begin{pmatrix} \psi(O_i, \nu) \\ \psi(O_i, \beta) \end{pmatrix} = 0, \end{equation}\]
where \(\nu\) are parameters in
nuisance model(s), such as a propensity score model, and \(\beta\) are the target causal parameters.
Even when \(\nu\) represent parameters
in common statistical models, deriving a closed form for a sandwich
variance estimator for \(\beta\) based
on Equation~\(\ref{eq:causal}\) may
involve tedious and error-prone derivative and matrix calculations
[e.g.; see the appendices of Lunceford and
Davidian (2004) and Perez-Heydrich et al.
(2014)]. In this example, we show how an analyst can avoid these
calculations and compute the empirical sandwich variance estimator using
geex
.
Lunceford and Davidian (2004) review several estimators of causal effects from observational data. To demonstrate a more complicated estimator involving multiple nuisance models, we implement the doubly robust estimator:
\[\begin{equation} \label{eq:dbr} \hat{\Delta}_{DR} = \sum_{i = 1}^m \frac{Z_iY_i - (Z_i - \hat{e}_i) m_1(X_i, \hat{\alpha}_1)}{\hat{e}_i} - \frac{(1 - Z_i)Y_i - (Z_i - \hat{e}_i) m_0(X_i, \hat{\alpha}_0)}{1 - \hat{e}_i}. \end{equation}\]
This estimator targets the average causal effect, \(\Delta = \E[Y(1) - Y(0)]\), where \(Y(z)\) is the potential outcome for an experimental unit had it been exposed to the level \(z\) of the binary exposure variable \(Z\). The estimated propsensity score, \(\hat{e}_i\), is the estimated probability that unit \(i\) received \(z = 1\) and \(m_z(X_i, \hat{\alpha}_z)\) are regression models for the outcome with baseline covariates \(X_i\) and estimated paramaters \(\hat{\alpha}_z\). This estimator has the property that if either the propensity score models or the outcome models are correctly specified, then the solution to Equation~\(\ref{eq:dbr}\) will be a consistent and asymptotically Normal estimator of \(\Delta\).
This estimator and its estimating equations can be translated into an
estFUN
as:
dr_estFUN <- function(data, models){
Z <- data$Z
Y <- data$Y
Xe <- grab_design_matrix(
data, rhs_formula = grab_fixed_formula(models$e))
Xm0 <- grab_design_matrix(
data, rhs_formula = grab_fixed_formula(models$m0))
Xm1 <- grab_design_matrix(
data, rhs_formula = grab_fixed_formula(models$m1))
e_pos <- 1:ncol(Xe)
m0_pos <- (max(e_pos) + 1):(max(e_pos) + ncol(Xm0))
m1_pos <- (max(m0_pos) + 1):(max(m0_pos) + ncol(Xm1))
e_scores <- grab_psiFUN(models$e, data)
m0_scores <- grab_psiFUN(models$m0, data)
m1_scores <- grab_psiFUN(models$m1, data)
function(theta){
e <- plogis(Xe %*% theta[e_pos])
m0 <- Xm0 %*% theta[m0_pos]
m1 <- Xm1 %*% theta[m1_pos]
rd_hat <- (Z*Y - (Z - e) * m1)/e - ((1 - Z) * Y - (Z - e) * m0)/(1 - e)
c(e_scores(theta[e_pos]),
m0_scores(theta[m0_pos]) * (Z == 0),
m1_scores(theta[m1_pos]) * (Z == 1),
rd_hat - theta[length(theta)])
}
}
This estFUN
presumes that the user will pass a list
containing fitted model objects for the three nuisance models: the
propensity score model and one regression model for each treatment
group. The functions grab_design_matrix
and
grab_fixed_formula
are geex
utilities for
extracting relevant pieces of a model object. The function
grab_psiFUN
converts a fitted model object to an estimating
function; for example, for a glm
object,
grab_psiFUN
uses the to create a function
of
theta
corresponding to the generalized linear model score
function. The m_estimate
function can be wrapped in another
function, wherein nuisance models are fit and passed to
m_estimate
.
estimate_dr <- function(data, propensity_formula, outcome_formula){
e_model <- glm(propensity_formula, data = data, family = binomial)
m0_model <- glm(outcome_formula, subset = (Z == 0), data = data)
m1_model <- glm(outcome_formula, subset = (Z == 1), data = data)
models <- list(e = e_model, m0 = m0_model, m1 = m1_model)
nparms <- sum(unlist(lapply(models, function(x) length(coef(x))))) + 1
m_estimate(
estFUN = dr_estFUN,
data = data,
root_control = setup_root_control(start = rep(0, nparms)),
outer_args = list(models = models))
}
The following code provides a function to replicate the simulation settings of Lunceford and Davidian (2004).
library(mvtnorm)
tau_0 <- c(-1, -1, 1, 1)
tau_1 <- tau_0 * -1
Sigma_X3 <- matrix(
c(1, 0.5, -0.5, -0.5,
0.5, 1, -0.5, -0.5,
-0.5, -0.5, 1, 0.5,
-0.5, -0.5, 0.5, 1), ncol = 4, byrow = TRUE)
gen_data <- function(n, beta, nu, xi){
X3 <- rbinom(n, 1, prob = 0.2)
V3 <- rbinom(n, 1, prob = (0.75 * X3 + (0.25 * (1 - X3))))
hold <- rmvnorm(n, mean = rep(0, 4), Sigma_X3)
colnames(hold) <- c("X1", "V1", "X2", "V2")
hold <- cbind(hold, X3, V3)
hold <- apply(hold, 1, function(x){
x[1:4] <- x[1:4] + tau_1^(x[5])*tau_0^(1 - x[5])
x})
hold <- t(hold)[, c("X1", "X2", "X3", "V1", "V2", "V3")]
X <- cbind(Int = 1, hold)
Z <- rbinom(n, 1, prob = plogis(X[, 1:4] %*% beta))
X <- cbind(X[, 1:4], Z, X[, 5:7])
data.frame(
Y = X %*% c(nu, xi) + rnorm(n),
X[ , -1])
}
To show that estimate_dr
correctly computes \(\hat{\Delta}_{DR}\), the results from
geex
can be compared to computing \(\hat{\Delta}_{DR}\) “by hand” for a
simulated dataset.
dt <- gen_data(1000, c(0, 0.6, -0.6, 0.6), c(0, -1, 1, -1, 2), c(-1, 1, 1))
geex_results <- estimate_dr(dt, Z ~ X1 + X2 + X3, Y ~ X1 + X2 + X3)
e <- predict(glm(Z ~ X1 + X2 + X3, data = dt, family = "binomial"),
type = "response")
m0 <- predict(glm(Y ~ X1 + X2 + X3, data = dt, subset = Z==0), newdata = dt)
m1 <- predict(glm(Y ~ X1 + X2 + X3, data = dt, subset = Z==1), newdata = dt)
del_hat <- with(dt, mean( (Z * Y - (Z - e) * m1)/e)) -
with(dt, mean(((1 - Z) * Y - (Z - e) * m0)/(1 - e)))