Create predictorMatrix for use with mice

missing_predictorMatrix(
  .data,
  drop_from_imputed = NULL,
  drop_from_imputer = NULL
)

Arguments

.data

Data frame.

drop_from_imputed

Quoted names of variables not to impute.

drop_from_imputer

Quoted names of variables not to use in imputation algorithm.

Value

Matrix formatted for predictorMatrix argument in mice.

Examples

library(mice)
#> 
#> Attaching package: ‘mice’
#> The following object is masked from ‘package:stats’:
#> 
#>     filter
#> The following objects are masked from ‘package:base’:
#> 
#>     cbind, rbind
library(dplyr)

# Create some extra missing data
## Smoking missing completely at random
set.seed(1)
colon_s$smoking_mcar =
  sample(c("Smoker", "Non-smoker", NA),
  dim(colon_s)[1], replace=TRUE,
  prob = c(0.2, 0.7, 0.1)) %>%
  factor() %>%
  ff_label("Smoking (MCAR)")

## Make smoking missing conditional on patient sex
colon_s$smoking_mar[colon_s$sex.factor == "Female"] =
  sample(c("Smoker", "Non-smoker", NA),
   sum(colon_s$sex.factor == "Female"),
   replace = TRUE, prob = c(0.1, 0.5, 0.4))

colon_s$smoking_mar[colon_s$sex.factor == "Male"] =
  sample(c("Smoker", "Non-smoker", NA),
   sum(colon_s$sex.factor == "Male"),
   replace=TRUE, prob = c(0.15, 0.75, 0.1))
colon_s$smoking_mar = factor(colon_s$smoking_mar)%>%
  ff_label("Smoking (MAR)")

explanatory = c("age", "sex.factor",
  "nodes", "obstruct.factor", "smoking_mar")
dependent = "mort_5yr"

colon_s %>%
select(dependent, explanatory) %>%
  missing_predictorMatrix(drop_from_imputed =
    c("obstruct.factor", "mort_5yr")) -> predM
#> Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
#>  Please use `all_of()` or `any_of()` instead.
#>   # Was:
#>   data %>% select(dependent)
#> 
#>   # Now:
#>   data %>% select(all_of(dependent))
#> 
#> See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
#> Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
#>  Please use `all_of()` or `any_of()` instead.
#>   # Was:
#>   data %>% select(explanatory)
#> 
#>   # Now:
#>   data %>% select(all_of(explanatory))
#> 
#> See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.

colon_s %>%
  select(dependent, explanatory) %>%
  mice(m = 2, predictorMatrix = predM) %>% # e.g. m=10 when for real
  # Run logistic regression on each imputed set
  with(glm(formula(ff_formula(dependent, explanatory)),
           family="binomial")) %>%
  pool() %>%
  summary(conf.int = TRUE, exponentiate = TRUE) %>%
  # Jiggle into finalfit format
  mutate(explanatory_name = rownames(.)) %>%
  select(explanatory_name, estimate, `2.5 %`, `97.5 %`, p.value) %>%
  condense_fit(estimate_suffix = " (multiple imputation)") %>%
  remove_intercept() -> fit_imputed
#> 
#>  iter imp variable
#>   1   1  mort_5yr  nodes  obstruct.factor  smoking_mar
#>   1   2  mort_5yr  nodes  obstruct.factor  smoking_mar
#>   2   1  mort_5yr  nodes  obstruct.factor  smoking_mar
#>   2   2  mort_5yr  nodes  obstruct.factor  smoking_mar
#>   3   1  mort_5yr  nodes  obstruct.factor  smoking_mar
#>   3   2  mort_5yr  nodes  obstruct.factor  smoking_mar
#>   4   1  mort_5yr  nodes  obstruct.factor  smoking_mar
#>   4   2  mort_5yr  nodes  obstruct.factor  smoking_mar
#>   5   1  mort_5yr  nodes  obstruct.factor  smoking_mar
#>   5   2  mort_5yr  nodes  obstruct.factor  smoking_mar