Extract model fit results to dataframe (generic): finalfit model extractors

Takes output from finalfit model wrappers and extracts to a dataframe, convenient for further processing in preparation for final results table.

fit2df.lm is the model extract method for lm.

fit2df.lmlist is the model extract method for lmuni and lmmulti.

fit2df.glm is the model extract method for standard glm models, which have not used finalfit model wrappers.

fit2df.glmboot is the model extract method for glmmulti_boot models.

fit2df.glmlist is the model extract method for glmuni and glmmulti.

fit2df.svyglmlist is the model extract method for svyglmuni and svyglmmulti.

fit2df.lmerMod is the model extract method for standard lme4::lmer models and for the finalfit::lmmixed model wrapper.

fit2df.glmerMod is the model extract method for standard lme4::glmer models and for the finalfit::glmmixed model wrapper.

fit2df.coxph is the model extract method for survival::coxph.

fit2df.coxphlist is the model extract method for coxphuni and coxphmulti.

fit2df.crr is the model extract method for cmprsk::crr.

fit2df.coxme is the model extract method for eoxme::coxme.

fit2df.crr is the model extract method for crruni and crrmulti.

fit2df.stanfit is the model extract method for our standard Bayesian hierarchical binomial logistic regression models. These models will be fully documented separately. However this should work for a single or multilevel Bayesian logistic regression done in Stan, as long as the fixed effects are specified in the parameters block as a vector named beta, of length P, where P is the number of fixed effect parameters. e.g. parameters( vector[P] beta; )

fit2df.mipo is the model extract method for the mipo object created using mice::pool.

fit2df(...)

# S3 method for lm
fit2df(
  .data,
  condense = TRUE,
  metrics = FALSE,
  remove_intercept = TRUE,
  explanatory_name = "explanatory",
  estimate_name = "Coefficient",
  estimate_suffix = "",
  p_name = "p",
  digits = c(2, 2, 3),
  confint_level = 0.95,
  confint_sep = " to ",
  ...
)

# S3 method for lmlist
fit2df(
  .data,
  condense = TRUE,
  metrics = FALSE,
  remove_intercept = TRUE,
  explanatory_name = "explanatory",
  estimate_name = "Coefficient",
  estimate_suffix = "",
  p_name = "p",
  digits = c(2, 2, 3),
  confint_level = 0.95,
  confint_sep = " to ",
  ...
)

# S3 method for glm
fit2df(
  .data,
  condense = TRUE,
  metrics = FALSE,
  remove_intercept = TRUE,
  explanatory_name = "explanatory",
  estimate_name = "OR",
  estimate_suffix = "",
  p_name = "p",
  digits = c(2, 2, 3),
  exp = TRUE,
  confint_type = "profile",
  confint_level = 0.95,
  confint_sep = "-",
  ...
)

# S3 method for glmboot
fit2df(
  .data,
  condense = TRUE,
  metrics = FALSE,
  remove_intercept = TRUE,
  explanatory_name = "explanatory",
  estimate_name = "OR",
  estimate_suffix = "",
  p_name = "p",
  digits = c(2, 2, 3),
  exp = TRUE,
  confint_level = 0.95,
  confint_sep = "-",
  ...
)

# S3 method for glmlist
fit2df(
  .data,
  condense = TRUE,
  metrics = FALSE,
  remove_intercept = TRUE,
  explanatory_name = "explanatory",
  estimate_name = "OR",
  estimate_suffix = "",
  p_name = "p",
  digits = c(2, 2, 3),
  exp = TRUE,
  confint_type = "profile",
  confint_level = 0.95,
  confint_sep = "-",
  ...
)

# S3 method for svyglmlist
fit2df(
  .data,
  condense = TRUE,
  metrics = FALSE,
  remove_intercept = TRUE,
  explanatory_name = "explanatory",
  estimate_name = "Coefficient",
  estimate_suffix = "",
  p_name = "p",
  digits = c(2, 2, 3),
  exp = FALSE,
  confint_type = "profile",
  confint_level = 0.95,
  confint_sep = "-",
  ...
)

# S3 method for lmerMod
fit2df(
  .data,
  condense = TRUE,
  metrics = FALSE,
  remove_intercept = TRUE,
  explanatory_name = "explanatory",
  estimate_name = "Coefficient",
  estimate_suffix = "",
  p_name = "p",
  digits = c(2, 2, 3),
  confint_type = "Wald",
  confint_level = 0.95,
  confint_sep = " to ",
  ...
)

# S3 method for glmerMod
fit2df(
  .data,
  condense = TRUE,
  metrics = FALSE,
  remove_intercept = TRUE,
  explanatory_name = "explanatory",
  estimate_name = "OR",
  estimate_suffix = "",
  p_name = "p",
  digits = c(2, 2, 3),
  exp = TRUE,
  confint_type = "Wald",
  confint_level = 0.95,
  confint_sep = "-",
  ...
)

# S3 method for coxph
fit2df(
  .data,
  condense = TRUE,
  metrics = FALSE,
  explanatory_name = "explanatory",
  estimate_name = "HR",
  estimate_suffix = "",
  p_name = "p",
  digits = c(2, 2, 3),
  confint_sep = "-",
  ...
)

# S3 method for coxphlist
fit2df(
  .data,
  condense = TRUE,
  metrics = FALSE,
  explanatory_name = "explanatory",
  estimate_name = "HR",
  estimate_suffix = "",
  p_name = "p",
  digits = c(2, 2, 3),
  confint_sep = "-",
  ...
)

# S3 method for crr
fit2df(
  .data,
  condense = TRUE,
  metrics = FALSE,
  explanatory_name = "explanatory",
  estimate_name = "HR",
  estimate_suffix = "",
  p_name = "p",
  digits = c(2, 2, 3),
  confint_sep = "-",
  ...
)

# S3 method for coxme
fit2df(
  .data,
  condense = TRUE,
  metrics = FALSE,
  explanatory_name = "explanatory",
  estimate_name = "HR",
  estimate_suffix = "",
  p_name = "p",
  digits = c(2, 2, 3),
  confint_sep = "-",
  ...
)

# S3 method for crrlist
fit2df(
  .data,
  condense = TRUE,
  metrics = FALSE,
  explanatory_name = "explanatory",
  estimate_name = "HR",
  estimate_suffix = "",
  p_name = "p",
  digits = c(2, 2, 3),
  confint_sep = "-",
  ...
)

# S3 method for stanfit
fit2df(
  .data,
  condense = TRUE,
  metrics = FALSE,
  remove_intercept = TRUE,
  explanatory_name = "explanatory",
  estimate_name = "OR",
  estimate_suffix = "",
  p_name = "p",
  digits = c(2, 2, 3),
  confint_sep = "-",
  ...
)

# S3 method for mipo
fit2df(
  .data,
  condense = TRUE,
  metrics = FALSE,
  remove_intercept = TRUE,
  explanatory_name = "explanatory",
  estimate_name = "Coefficient",
  estimate_suffix = "",
  p_name = "p",
  digits = c(2, 2, 3),
  exp = FALSE,
  confint_level = 0.95,
  confint_sep = "-",
  ...
)

Arguments

...: Other arguments: X: Design matrix from stanfit modelling. Details documented else where.
.data: Output from finalfit model wrappers.
condense: Logical: when true, effect estimates, confidence intervals and p-values are pasted conveniently together in single cell.
metrics: Logical: when true, useful model metrics are extracted.
remove_intercept: Logical: remove the results for the intercept term.
explanatory_name: Name for this column in output
estimate_name: Name for this column in output
estimate_suffix: Appeneded to estimate name
p_name: Name given to p-value estimate
digits: Number of digits to round to (1) estimate, (2) confidence interval limits, (3) p-value.
confint_level: The confidence level required.
confint_sep: String to separate confidence intervals, typically "-" or " to ".
exp: Currently GLM only. Exponentiate coefficients and confidence intervals. Defaults to TRUE.
confint_type: One of c("profile", "default") for GLM models (confint.glm) or c("profile", "Wald", "boot") for glmer/lmer models (confint.merMod.). Not implemented for lm, coxph or coxphlist.

Value

A dataframe of model parameters. When metrics=TRUE output is a list of two dataframes, one is model parameters, one is model metrics. length two

Details

fit2df is a generic (S3) function for model extract.

Examples

library(finalfit)
library(dplyr)
library(survival)
# glm
fit = glm(mort_5yr ~  age.factor + sex.factor + obstruct.factor + perfor.factor,
  data=colon_s, family="binomial")
fit %>%
  fit2df(estimate_suffix=" (multivariable)")
#> Waiting for profiling to be done...
#>             explanatory        OR (multivariable)
#> 1 age.factor40-59 years 0.57 (0.34-0.98, p=0.041)
#> 2   age.factor60+ years 0.81 (0.48-1.36, p=0.426)
#> 3        sex.factorMale 0.98 (0.75-1.28, p=0.902)
#> 4    obstruct.factorYes 1.25 (0.90-1.76, p=0.186)
#> 5      perfor.factorYes 1.12 (0.51-2.44, p=0.770)

# glmlist
explanatory = c("age.factor", "sex.factor", "obstruct.factor", "perfor.factor")
dependent = "mort_5yr"
colon_s %>%
  glmmulti(dependent, explanatory) %>%
  fit2df(estimate_suffix=" (univariable)")
#> Waiting for profiling to be done...
#>             explanatory          OR (univariable)
#> 1 age.factor40-59 years 0.57 (0.34-0.98, p=0.041)
#> 2   age.factor60+ years 0.81 (0.48-1.36, p=0.426)
#> 3        sex.factorMale 0.98 (0.75-1.28, p=0.902)
#> 4    obstruct.factorYes 1.25 (0.90-1.76, p=0.186)
#> 5      perfor.factorYes 1.12 (0.51-2.44, p=0.770)

# glmerMod
explanatory = c("age.factor", "sex.factor", "obstruct.factor", "perfor.factor")
random_effect = "hospital"
dependent = "mort_5yr"
colon_s %>%
  glmmixed(dependent, explanatory, random_effect) %>%
  fit2df(estimate_suffix=" (multilevel)")
#>             explanatory           OR (multilevel)
#> 1 age.factor40-59 years 0.75 (0.39-1.44, p=0.382)
#> 2   age.factor60+ years 1.03 (0.55-1.96, p=0.916)
#> 3        sex.factorMale 0.80 (0.58-1.11, p=0.180)
#> 4    obstruct.factorYes 1.23 (0.82-1.83, p=0.320)
#> 5      perfor.factorYes 1.03 (0.43-2.51, p=0.940)

# glmboot
## Note number of draws set to 100 just for speed in this example
explanatory = c("age.factor", "sex.factor", "obstruct.factor", "perfor.factor")
dependent = "mort_5yr"
colon_s %>%
  glmmulti_boot(dependent, explanatory,  R = 100) %>%
  fit2df(estimate_suffix=" (multivariable (BS CIs))")
#>             explanatory OR (multivariable (BS CIs))
#> 1 age.factor40-59 years   0.57 (0.26-0.89, p=0.020)
#> 2   age.factor60+ years   0.81 (0.41-1.34, p=0.380)
#> 3        sex.factorMale   0.98 (0.76-1.30, p=0.800)
#> 4    obstruct.factorYes   1.25 (0.83-1.72, p=0.200)
#> 5      perfor.factorYes   1.12 (0.40-2.45, p=0.760)

# lm
fit = lm(nodes ~  age.factor + sex.factor + obstruct.factor + perfor.factor,
  data=colon_s)
fit %>%
  fit2df(estimate_suffix=" (multivariable)")
#>             explanatory     Coefficient (multivariable)
#> 1 age.factor40-59 years -1.21 (-2.16 to -0.26, p=0.012)
#> 2   age.factor60+ years -1.25 (-2.18 to -0.33, p=0.008)
#> 3        sex.factorMale  -0.07 (-0.54 to 0.40, p=0.779)
#> 4    obstruct.factorYes  -0.31 (-0.91 to 0.29, p=0.313)
#> 5      perfor.factorYes   0.28 (-1.09 to 1.66, p=0.686)

# lmerMod
explanatory = c("age.factor", "sex.factor", "obstruct.factor", "perfor.factor")
random_effect = "hospital"
dependent = "nodes"

colon_s %>%
  lmmixed(dependent, explanatory, random_effect) %>%
  fit2df(estimate_suffix=" (multilevel")
#> P-value for lmer is estimate assuming t-distribution is normal. Bootstrap for final publication.
#>             explanatory         Coefficient (multilevel
#> 1 age.factor40-59 years  -0.79 (-1.65 to 0.07, p=0.035)
#> 2   age.factor60+ years -0.98 (-1.81 to -0.14, p=0.011)
#> 3        sex.factorMale  -0.19 (-0.62 to 0.24, p=0.195)
#> 4    obstruct.factorYes  -0.37 (-0.92 to 0.17, p=0.091)
#> 5      perfor.factorYes   0.23 (-1.01 to 1.48, p=0.357)

# coxphlist
explanatory = c("age.factor", "sex.factor", "obstruct.factor", "perfor.factor")
dependent = "Surv(time, status)"

colon_s %>%
  coxphuni(dependent, explanatory) %>%
  fit2df(estimate_suffix=" (univariable)")
#>             explanatory          HR (univariable)
#> 1 age.factor40-59 years 0.76 (0.53-1.09, p=0.132)
#> 2   age.factor60+ years 0.93 (0.66-1.31, p=0.668)
#> 3        sex.factorMale 1.01 (0.84-1.22, p=0.888)
#> 4    obstruct.factorYes 1.29 (1.03-1.62, p=0.028)
#> 5      perfor.factorYes 1.17 (0.70-1.95, p=0.556)

colon_s %>%
  coxphmulti(dependent, explanatory) %>%
  fit2df(estimate_suffix=" (multivariable)")
#>             explanatory        HR (multivariable)
#> 1 age.factor40-59 years 0.79 (0.55-1.13, p=0.196)
#> 2   age.factor60+ years 0.98 (0.69-1.40, p=0.926)
#> 3        sex.factorMale 1.02 (0.85-1.23, p=0.812)
#> 4    obstruct.factorYes 1.30 (1.03-1.64, p=0.026)
#> 5      perfor.factorYes 1.08 (0.64-1.81, p=0.785)

# coxph
fit = coxph(Surv(time, status) ~ age.factor + sex.factor + obstruct.factor + perfor.factor,
  data = colon_s)

fit %>%
  fit2df(estimate_suffix=" (multivariable)")
#>             explanatory        HR (multivariable)
#> 1 age.factor40-59 years 0.79 (0.55-1.13, p=0.196)
#> 2   age.factor60+ years 0.98 (0.69-1.40, p=0.926)
#> 3        sex.factorMale 1.02 (0.85-1.23, p=0.812)
#> 4    obstruct.factorYes 1.30 (1.03-1.64, p=0.026)
#> 5      perfor.factorYes 1.08 (0.64-1.81, p=0.785)
  
# crr: competing risks
melanoma = boot::melanoma
melanoma = melanoma %>% 
  mutate(
    status_crr = ifelse(status == 2, 0, # "still alive"
      ifelse(status == 1, 1, # "died of melanoma"
      2)), # "died of other causes" 
    sex = factor(sex),
    ulcer = factor(ulcer)
  )

dependent = c("Surv(time, status_crr)")
explanatory = c("sex", "age", "ulcer")
melanoma %>% 
  summary_factorlist(dependent, explanatory, column = TRUE, fit_id = TRUE) %>% 
  ff_merge(
    melanoma %>% 
      crrmulti(dependent, explanatory) %>% 
      fit2df(estimate_suffix = " (competing risks)")
  ) %>% 
select(-fit_id, -index) %>% 
dependent_label(melanoma, dependent)
#> Dependent variable is a survival object
#>   Dependent: Surv(time, status_crr)                   all
#> 2                               sex         0  126 (61.5)
#> 3                                           1   79 (38.5)
#> 1                               age Mean (SD) 52.5 (16.7)
#> 4                             ulcer         0  115 (56.1)
#> 5                                           1   90 (43.9)
#>        HR (competing risks)
#> 2                         -
#> 3 1.61 (0.94-2.75, p=0.084)
#> 1 1.01 (0.99-1.03, p=0.370)
#> 4                         -
#> 5 3.81 (2.16-6.72, p<0.001)