This was written a few days after the retraction of a paper in JAMA due to an error in recoding the treatment variable (https://jamanetwork.com/journals/jama/fullarticle/2752474). This takes a data frame or tibble, fuzzy matches variable names, and produces crosstables of all matched variables. A visual inspection should reveal any miscoding.

check_recode(.data, dependent = NULL, explanatory = NULL,
  include_numerics = TRUE, ...)

Arguments

.data

Data frame or tibble.

dependent

Optional character vector: name(s) of depdendent variable(s).

explanatory

Optional character vector: name(s) of explanatory variable(s).

include_numerics

Logical. Include numeric variables in function.

...

Pass other arguments to agrep.

Value

List of length two. The first is an index of variable combiations. The second is a nested list of crosstables as tibbles.

Examples

library(dplyr) data(colon_s) colon_s_small = colon_s %>% select(-id, -rx, -rx.factor) %>% mutate( age.factor2 = forcats::fct_collapse(age.factor, "<60 years" = c("<40 years", "40-59 years")), sex.factor2 = forcats::fct_recode(sex.factor, # Intentional miscode "F" = "Male", "M" = "Female") ) # Check colon_s_small %>% check_recode(include_numerics = FALSE)
#> $index #> # A tibble: 3 x 2 #> var1 var2 #> <chr> <chr> #> 1 sex.factor sex.factor2 #> 2 age.factor age.factor2 #> 3 sex.factor2 age.factor2 #> #> $counts #> $counts[[1]] #> sex.factor sex.factor2 n #> 1 Female M 445 #> 2 Male F 484 #> #> $counts[[2]] #> age.factor age.factor2 n #> 1 <40 years <60 years 70 #> 2 40-59 years <60 years 344 #> 3 60+ years 60+ years 515 #> #> $counts[[3]] #> sex.factor2 age.factor2 n #> 1 M <60 years 204 #> 2 M 60+ years 241 #> 3 F <60 years 210 #> 4 F 60+ years 274 #> #>
out = colon_s_small %>% select(-extent, -extent.factor,-time, -time.years) %>% check_recode() out
#> $index #> # A tibble: 19 x 2 #> var1 var2 #> <chr> <chr> #> 1 sex sex.factor #> 2 sex sex.factor2 #> 3 age age.factor #> 4 age age.10 #> 5 age age.factor2 #> 6 obstruct obstruct.factor #> 7 perfor perfor.factor #> 8 adhere adhere.factor #> 9 nodes node4 #> 10 nodes node4.factor #> 11 status status.factor #> 12 differ differ.factor #> 13 surg surg.factor #> 14 node4 node4.factor #> 15 sex.factor sex.factor2 #> 16 age.factor age.factor2 #> 17 loccomp loccomp.factor #> 18 mort_5yr mort_5yr.num #> 19 sex.factor2 age.factor2 #> #> $counts #> $counts[[1]] #> sex sex.factor n #> 1 0 Female 445 #> 2 1 Male 484 #> #> $counts[[2]] #> sex sex.factor2 n #> 1 0 M 445 #> 2 1 F 484 #> #> $counts[[3]] #> age age.factor n #> 1 18 <40 years 1 #> 2 22 <40 years 1 #> 3 25 <40 years 1 #> 4 26 <40 years 1 #> 5 27 <40 years 3 #> 6 28 <40 years 1 #> 7 29 <40 years 1 #> 8 30 <40 years 5 #> 9 31 <40 years 2 #> 10 32 <40 years 5 #> 11 33 <40 years 7 #> 12 34 <40 years 4 #> 13 35 <40 years 2 #> 14 36 <40 years 10 #> 15 37 <40 years 2 #> 16 38 <40 years 10 #> 17 39 <40 years 14 #> 18 40 40-59 years 8 #> 19 41 40-59 years 7 #> 20 42 40-59 years 7 #> 21 43 40-59 years 11 #> 22 44 40-59 years 8 #> 23 45 40-59 years 13 #> 24 46 40-59 years 19 #> 25 47 40-59 years 12 #> 26 48 40-59 years 15 #> 27 49 40-59 years 13 #> 28 50 40-59 years 14 #> 29 51 40-59 years 10 #> 30 52 40-59 years 20 #> 31 53 40-59 years 22 #> 32 54 40-59 years 16 #> 33 55 40-59 years 27 #> 34 56 40-59 years 31 #> 35 57 40-59 years 31 #> 36 58 40-59 years 29 #> 37 59 40-59 years 31 #> 38 60 60+ years 31 #> 39 61 60+ years 36 #> 40 62 60+ years 21 #> 41 63 60+ years 29 #> 42 64 60+ years 36 #> 43 65 60+ years 28 #> 44 66 60+ years 35 #> 45 67 60+ years 24 #> 46 68 60+ years 38 #> 47 69 60+ years 20 #> 48 70 60+ years 36 #> 49 71 60+ years 24 #> 50 72 60+ years 25 #> 51 73 60+ years 20 #> 52 74 60+ years 34 #> 53 75 60+ years 17 #> 54 76 60+ years 21 #> 55 77 60+ years 11 #> 56 78 60+ years 5 #> 57 79 60+ years 7 #> 58 80 60+ years 8 #> 59 81 60+ years 5 #> 60 82 60+ years 2 #> 61 83 60+ years 1 #> 62 85 60+ years 1 #> #> $counts[[4]] #> age age.10 n #> 1 18 1.8 1 #> 2 22 2.2 1 #> 3 25 2.5 1 #> 4 26 2.6 1 #> 5 27 2.7 3 #> 6 28 2.8 1 #> 7 29 2.9 1 #> 8 30 3.0 5 #> 9 31 3.1 2 #> 10 32 3.2 5 #> 11 33 3.3 7 #> 12 34 3.4 4 #> 13 35 3.5 2 #> 14 36 3.6 10 #> 15 37 3.7 2 #> 16 38 3.8 10 #> 17 39 3.9 14 #> 18 40 4.0 8 #> 19 41 4.1 7 #> 20 42 4.2 7 #> 21 43 4.3 11 #> 22 44 4.4 8 #> 23 45 4.5 13 #> 24 46 4.6 19 #> 25 47 4.7 12 #> 26 48 4.8 15 #> 27 49 4.9 13 #> 28 50 5.0 14 #> 29 51 5.1 10 #> 30 52 5.2 20 #> 31 53 5.3 22 #> 32 54 5.4 16 #> 33 55 5.5 27 #> 34 56 5.6 31 #> 35 57 5.7 31 #> 36 58 5.8 29 #> 37 59 5.9 31 #> 38 60 6.0 31 #> 39 61 6.1 36 #> 40 62 6.2 21 #> 41 63 6.3 29 #> 42 64 6.4 36 #> 43 65 6.5 28 #> 44 66 6.6 35 #> 45 67 6.7 24 #> 46 68 6.8 38 #> 47 69 6.9 20 #> 48 70 7.0 36 #> 49 71 7.1 24 #> 50 72 7.2 25 #> 51 73 7.3 20 #> 52 74 7.4 34 #> 53 75 7.5 17 #> 54 76 7.6 21 #> 55 77 7.7 11 #> 56 78 7.8 5 #> 57 79 7.9 7 #> 58 80 8.0 8 #> 59 81 8.1 5 #> 60 82 8.2 2 #> 61 83 8.3 1 #> 62 85 8.5 1 #> #> $counts[[5]] #> age age.factor2 n #> 1 18 <60 years 1 #> 2 22 <60 years 1 #> 3 25 <60 years 1 #> 4 26 <60 years 1 #> 5 27 <60 years 3 #> 6 28 <60 years 1 #> 7 29 <60 years 1 #> 8 30 <60 years 5 #> 9 31 <60 years 2 #> 10 32 <60 years 5 #> 11 33 <60 years 7 #> 12 34 <60 years 4 #> 13 35 <60 years 2 #> 14 36 <60 years 10 #> 15 37 <60 years 2 #> 16 38 <60 years 10 #> 17 39 <60 years 14 #> 18 40 <60 years 8 #> 19 41 <60 years 7 #> 20 42 <60 years 7 #> 21 43 <60 years 11 #> 22 44 <60 years 8 #> 23 45 <60 years 13 #> 24 46 <60 years 19 #> 25 47 <60 years 12 #> 26 48 <60 years 15 #> 27 49 <60 years 13 #> 28 50 <60 years 14 #> 29 51 <60 years 10 #> 30 52 <60 years 20 #> 31 53 <60 years 22 #> 32 54 <60 years 16 #> 33 55 <60 years 27 #> 34 56 <60 years 31 #> 35 57 <60 years 31 #> 36 58 <60 years 29 #> 37 59 <60 years 31 #> 38 60 60+ years 31 #> 39 61 60+ years 36 #> 40 62 60+ years 21 #> 41 63 60+ years 29 #> 42 64 60+ years 36 #> 43 65 60+ years 28 #> 44 66 60+ years 35 #> 45 67 60+ years 24 #> 46 68 60+ years 38 #> 47 69 60+ years 20 #> 48 70 60+ years 36 #> 49 71 60+ years 24 #> 50 72 60+ years 25 #> 51 73 60+ years 20 #> 52 74 60+ years 34 #> 53 75 60+ years 17 #> 54 76 60+ years 21 #> 55 77 60+ years 11 #> 56 78 60+ years 5 #> 57 79 60+ years 7 #> 58 80 60+ years 8 #> 59 81 60+ years 5 #> 60 82 60+ years 2 #> 61 83 60+ years 1 #> 62 85 60+ years 1 #> #> $counts[[6]] #> obstruct obstruct.factor n #> 1 0 No 732 #> 2 1 Yes 176 #> 3 NA <NA> 21 #> #> $counts[[7]] #> perfor perfor.factor n #> 1 0 No 902 #> 2 1 Yes 27 #> #> $counts[[8]] #> adhere adhere.factor n #> 1 0 No 794 #> 2 1 Yes 135 #> #> $counts[[9]] #> nodes node4 n #> 1 0 0 2 #> 2 1 0 269 #> 3 1 1 5 #> 4 2 0 194 #> 5 3 0 124 #> 6 3 1 1 #> 7 4 0 81 #> 8 4 1 3 #> 9 5 0 1 #> 10 5 1 45 #> 11 6 1 43 #> 12 7 1 38 #> 13 8 0 1 #> 14 8 1 22 #> 15 9 0 1 #> 16 9 1 19 #> 17 10 1 13 #> 18 11 1 10 #> 19 12 1 11 #> 20 13 1 7 #> 21 14 1 4 #> 22 15 1 6 #> 23 16 1 1 #> 24 17 1 2 #> 25 19 1 2 #> 26 20 1 2 #> 27 22 1 1 #> 28 24 1 1 #> 29 27 1 1 #> 30 33 1 1 #> 31 NA 0 1 #> 32 NA 1 17 #> #> $counts[[10]] #> nodes node4.factor n #> 1 0 No 2 #> 2 1 No 269 #> 3 1 Yes 5 #> 4 2 No 194 #> 5 3 No 124 #> 6 3 Yes 1 #> 7 4 No 81 #> 8 4 Yes 3 #> 9 5 No 1 #> 10 5 Yes 45 #> 11 6 Yes 43 #> 12 7 Yes 38 #> 13 8 No 1 #> 14 8 Yes 22 #> 15 9 No 1 #> 16 9 Yes 19 #> 17 10 Yes 13 #> 18 11 Yes 10 #> 19 12 Yes 11 #> 20 13 Yes 7 #> 21 14 Yes 4 #> 22 15 Yes 6 #> 23 16 Yes 1 #> 24 17 Yes 2 #> 25 19 Yes 2 #> 26 20 Yes 2 #> 27 22 Yes 1 #> 28 24 Yes 1 #> 29 27 Yes 1 #> 30 33 Yes 1 #> 31 NA No 1 #> 32 NA Yes 17 #> #> $counts[[11]] #> status status.factor n #> 1 0 Alive 477 #> 2 1 Died 452 #> #> $counts[[12]] #> differ differ.factor n #> 1 1 Well 93 #> 2 2 Moderate 663 #> 3 3 Poor 150 #> 4 NA <NA> 23 #> #> $counts[[13]] #> surg surg.factor n #> 1 0 Short 668 #> 2 1 Long 244 #> 3 NA <NA> 17 #> #> $counts[[14]] #> node4 node4.factor n #> 1 0 No 674 #> 2 1 Yes 255 #> #> $counts[[15]] #> sex.factor sex.factor2 n #> 1 Female M 445 #> 2 Male F 484 #> #> $counts[[16]] #> age.factor age.factor2 n #> 1 <40 years <60 years 70 #> 2 40-59 years <60 years 344 #> 3 60+ years 60+ years 515 #> #> $counts[[17]] #> loccomp loccomp.factor n #> 1 0 No 616 #> 2 1 Yes 293 #> 3 NA <NA> 20 #> #> $counts[[18]] #> mort_5yr mort_5yr.num n #> 1 Alive 1 511 #> 2 Died 2 404 #> 3 <NA> NA 14 #> #> $counts[[19]] #> sex.factor2 age.factor2 n #> 1 M <60 years 204 #> 2 M 60+ years 241 #> 3 F <60 years 210 #> 4 F 60+ years 274 #> #>
# Select a tibble and expand out$counts[[9]]
#> nodes node4 n #> 1 0 0 2 #> 2 1 0 269 #> 3 1 1 5 #> 4 2 0 194 #> 5 3 0 124 #> 6 3 1 1 #> 7 4 0 81 #> 8 4 1 3 #> 9 5 0 1 #> 10 5 1 45 #> 11 6 1 43 #> 12 7 1 38 #> 13 8 0 1 #> 14 8 1 22 #> 15 9 0 1 #> 16 9 1 19 #> 17 10 1 13 #> 18 11 1 10 #> 19 12 1 11 #> 20 13 1 7 #> 21 14 1 4 #> 22 15 1 6 #> 23 16 1 1 #> 24 17 1 2 #> 25 19 1 2 #> 26 20 1 2 #> 27 22 1 1 #> 28 24 1 1 #> 29 27 1 1 #> 30 33 1 1 #> 31 NA 0 1 #> 32 NA 1 17
# Note this variable (node4) appears miscoded in original dataset survival::colon. # Choose to only include variables that you actually use. # This uses standard Finalfit grammar. dependent = "mort_5yr" explanatory = c("age.factor2", "sex.factor2") colon_s_small %>% check_recode(dependent, explanatory)
#> $index #> # A tibble: 4 x 2 #> var1 var2 #> <chr> <chr> #> 1 mort_5yr mort_5yr.num #> 2 age.factor2 age.factor #> 3 sex.factor2 sex.factor #> 4 sex.factor2 age.factor2 #> #> $counts #> $counts[[1]] #> mort_5yr mort_5yr.num n #> 1 Alive 1 511 #> 2 Died 2 404 #> 3 <NA> NA 14 #> #> $counts[[2]] #> age.factor2 age.factor n #> 1 <60 years <40 years 70 #> 2 <60 years 40-59 years 344 #> 3 60+ years 60+ years 515 #> #> $counts[[3]] #> sex.factor2 sex.factor n #> 1 M Female 445 #> 2 F Male 484 #> #> $counts[[4]] #> sex.factor2 age.factor2 n #> 1 M <60 years 204 #> 2 M 60+ years 241 #> 3 F <60 years 210 #> 4 F 60+ years 274 #> #>