Do exact matching variables need to be included when estimating effects after matching with the MatchIt package? Take the following example (adapted from the MatchIt vignette on estimating effects after matching):
library(MatchIt)
library(marginaleffects)
Generate toy dataset
gen_X <- function(n) {
X <- matrix(rnorm(9 * n), nrow = n, ncol = 9)
X[,5] <- as.numeric(X[,5] < .5)
X
}
gen_A <- function(X) {
LP_A <- - 1.2 + log(2)X[,1] - log(1.5)X[,2] + log(2)X[,4] - log(2.4)X[,5] + log(2)X[,7] - log(1.5)X[,8]
P_A <- plogis(LP_A)
rbinom(nrow(X), 1, P_A)
}
gen_Y_B <- function(A, X) {
LP_B <- -2 + log(2.4)A + log(2)X[,1] + log(2)X[,2] + log(2)X[,3] + log(1.5)X[,4] + log(2.4)X[,5] + log(1.5)*X[,6]
P_B <- plogis(LP_B)
rbinom(length(A), 1, P_B)
}
set.seed(123)
n <- 2000
X <- gen_X(n)
A <- gen_A(X)
Y_B <- gen_Y_B(A, X)
stratum <- sample(rep(1:10, each = n/10))
d <- data.frame(A, X, Y_B, stratum)
rm(list=setdiff(ls(), "d"))
Create matchit object
mF <- matchit(A ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9,
data = d,
method = "quick",
distance = "glm",
link = "logit",
exact = ~stratum)
Extract matched data
md <- match.data(mF)
Is the following estimation of the regression model and computation of effects correct? If so, why doesn't the stratum variable need to be included?
#Logistic regression model with covariates
fit <- glm(Y_B ~ A * (X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9),
data = md,
weights = weights,
family = quasibinomial())
#Compute effects; RR and confidence interval
avg_comparisons(fit,
variables = "A",
vcov = ~subclass,
newdata = subset(md, A == 1),
wts = "weights",
comparison = "lnratioavg",
transform = "exp")
```