Create predictorMatrix for use with mice

missing_predictorMatrix(.data, drop_from_imputed = NULL,
  drop_from_imputer = NULL)

Arguments

.data

Data frame.

drop_from_imputed

Quoted names of variables not to impute.

drop_from_imputer

Quoted names of variables not to use in imputation algorithm.

Value

Matrix formatted for predictorMatrix argument in mice.

Examples

library(mice)
#> Loading required package: lattice
#> #> Attaching package: ‘mice’
#> The following objects are masked from ‘package:base’: #> #> cbind, rbind
library(dplyr) library(Hmisc)
#> Loading required package: Formula
#> #> Attaching package: ‘Hmisc’
#> The following objects are masked from ‘package:dplyr’: #> #> src, summarize
#> The following objects are masked from ‘package:base’: #> #> format.pval, units
# Create some extra missing data ## Smoking missing completely at random set.seed(1) colon_s$smoking_mcar = sample(c("Smoker", "Non-smoker", NA), dim(colon_s)[1], replace=TRUE, prob = c(0.2, 0.7, 0.1)) %>% factor() Hmisc::label(colon_s$smoking_mcar) = "Smoking (MCAR)" ## Make smoking missing conditional on patient sex colon_s$smoking_mar[colon_s$sex.factor == "Female"] = sample(c("Smoker", "Non-smoker", NA), sum(colon_s$sex.factor == "Female"), replace = TRUE, prob = c(0.1, 0.5, 0.4)) colon_s$smoking_mar[colon_s$sex.factor == "Male"] = sample(c("Smoker", "Non-smoker", NA), sum(colon_s$sex.factor == "Male"), replace=TRUE, prob = c(0.15, 0.75, 0.1)) colon_s$smoking_mar = factor(colon_s$smoking_mar) Hmisc::label(colon_s$smoking_mar) = "Smoking (MAR)" explanatory = c("age", "sex.factor", "nodes", "obstruct.factor", "smoking_mar") dependent = "mort_5yr" colon_s %>% select(dependent, explanatory) %>% missing_predictorMatrix(drop_from_imputed = c("obstruct.factor", "mort_5yr")) -> predM colon_s %>% select(dependent, explanatory) %>% mice(m = 2, predictorMatrix = predM) %>% # e.g. m=10 when for real # Run logistic regression on each imputed set with(glm(formula(ff_formula(dependent, explanatory)), family="binomial")) %>% pool() %>% summary(conf.int = TRUE, exponentiate = TRUE) %>% # Jiggle into finalfit format mutate(explanatory_name = rownames(.)) %>% select(explanatory_name, estimate, `2.5 %`, `97.5 %`, p.value) %>% condense_fit(estimate_suffix = " (multiple imputation)") %>% remove_intercept() -> fit_imputed
#> #> iter imp variable #> 1 1 mort_5yr nodes obstruct.factor smoking_mar #> 1 2 mort_5yr nodes obstruct.factor smoking_mar #> 2 1 mort_5yr nodes obstruct.factor smoking_mar #> 2 2 mort_5yr nodes obstruct.factor smoking_mar #> 3 1 mort_5yr nodes obstruct.factor smoking_mar #> 3 2 mort_5yr nodes obstruct.factor smoking_mar #> 4 1 mort_5yr nodes obstruct.factor smoking_mar #> 4 2 mort_5yr nodes obstruct.factor smoking_mar #> 5 1 mort_5yr nodes obstruct.factor smoking_mar #> 5 2 mort_5yr nodes obstruct.factor smoking_mar
#> Warning: Vectorizing 'labelled' elements may not preserve their attributes
#> Warning: Vectorizing 'labelled' elements may not preserve their attributes
#> Warning: Exponentiating coefficients, but model did not use a log or logit link function