This was written a few days after the retraction of a paper in JAMA due to an error in recoding the treatment variable (https://jamanetwork.com/journals/jama/fullarticle/2752474). This takes a data frame or tibble, fuzzy matches variable names, and produces crosstables of all matched variables. A visual inspection should reveal any miscoding.
check_recode(
.data,
dependent = NULL,
explanatory = NULL,
include_numerics = TRUE,
...
)
Data frame or tibble.
Optional character vector: name(s) of depdendent variable(s).
Optional character vector: name(s) of explanatory variable(s).
Logical. Include numeric variables in function.
Pass other arguments to agrep
.
List of length two. The first is an index of variable combiations. The second is a nested list of crosstables as tibbles.
library(dplyr)
data(colon_s)
colon_s_small = colon_s %>%
select(-id, -rx, -rx.factor) %>%
mutate(
age.factor2 = forcats::fct_collapse(age.factor,
"<60 years" = c("<40 years", "40-59 years")),
sex.factor2 = forcats::fct_recode(sex.factor,
# Intentional miscode
"F" = "Male",
"M" = "Female")
)
# Check
colon_s_small %>%
check_recode(include_numerics = FALSE)
#> $index
#> # A tibble: 3 × 2
#> var1 var2
#> <chr> <chr>
#> 1 sex.factor sex.factor2
#> 2 age.factor age.factor2
#> 3 sex.factor2 age.factor2
#>
#> $counts
#> $counts[[1]]
#> sex.factor sex.factor2 n
#> 1 Female M 445
#> 2 Male F 484
#>
#> $counts[[2]]
#> age.factor age.factor2 n
#> 1 <40 years <60 years 70
#> 2 40-59 years <60 years 344
#> 3 60+ years 60+ years 515
#>
#> $counts[[3]]
#> sex.factor2 age.factor2 n
#> 1 M <60 years 204
#> 2 M 60+ years 241
#> 3 F <60 years 210
#> 4 F 60+ years 274
#>
#>
out = colon_s_small %>%
select(-extent, -extent.factor,-time, -time.years) %>%
check_recode()
out
#> $index
#> # A tibble: 19 × 2
#> var1 var2
#> <chr> <chr>
#> 1 sex sex.factor
#> 2 sex sex.factor2
#> 3 age age.factor
#> 4 age age.10
#> 5 age age.factor2
#> 6 obstruct obstruct.factor
#> 7 perfor perfor.factor
#> 8 adhere adhere.factor
#> 9 nodes node4
#> 10 nodes node4.factor
#> 11 status status.factor
#> 12 differ differ.factor
#> 13 surg surg.factor
#> 14 node4 node4.factor
#> 15 sex.factor sex.factor2
#> 16 age.factor age.factor2
#> 17 loccomp loccomp.factor
#> 18 mort_5yr mort_5yr.num
#> 19 sex.factor2 age.factor2
#>
#> $counts
#> $counts[[1]]
#> sex sex.factor n
#> 1 0 Female 445
#> 2 1 Male 484
#>
#> $counts[[2]]
#> sex sex.factor2 n
#> 1 0 M 445
#> 2 1 F 484
#>
#> $counts[[3]]
#> age age.factor n
#> 1 18 <40 years 1
#> 2 22 <40 years 1
#> 3 25 <40 years 1
#> 4 26 <40 years 1
#> 5 27 <40 years 3
#> 6 28 <40 years 1
#> 7 29 <40 years 1
#> 8 30 <40 years 5
#> 9 31 <40 years 2
#> 10 32 <40 years 5
#> 11 33 <40 years 7
#> 12 34 <40 years 4
#> 13 35 <40 years 2
#> 14 36 <40 years 10
#> 15 37 <40 years 2
#> 16 38 <40 years 10
#> 17 39 <40 years 14
#> 18 40 40-59 years 8
#> 19 41 40-59 years 7
#> 20 42 40-59 years 7
#> 21 43 40-59 years 11
#> 22 44 40-59 years 8
#> 23 45 40-59 years 13
#> 24 46 40-59 years 19
#> 25 47 40-59 years 12
#> 26 48 40-59 years 15
#> 27 49 40-59 years 13
#> 28 50 40-59 years 14
#> 29 51 40-59 years 10
#> 30 52 40-59 years 20
#> 31 53 40-59 years 22
#> 32 54 40-59 years 16
#> 33 55 40-59 years 27
#> 34 56 40-59 years 31
#> 35 57 40-59 years 31
#> 36 58 40-59 years 29
#> 37 59 40-59 years 31
#> 38 60 60+ years 31
#> 39 61 60+ years 36
#> 40 62 60+ years 21
#> 41 63 60+ years 29
#> 42 64 60+ years 36
#> 43 65 60+ years 28
#> 44 66 60+ years 35
#> 45 67 60+ years 24
#> 46 68 60+ years 38
#> 47 69 60+ years 20
#> 48 70 60+ years 36
#> 49 71 60+ years 24
#> 50 72 60+ years 25
#> 51 73 60+ years 20
#> 52 74 60+ years 34
#> 53 75 60+ years 17
#> 54 76 60+ years 21
#> 55 77 60+ years 11
#> 56 78 60+ years 5
#> 57 79 60+ years 7
#> 58 80 60+ years 8
#> 59 81 60+ years 5
#> 60 82 60+ years 2
#> 61 83 60+ years 1
#> 62 85 60+ years 1
#>
#> $counts[[4]]
#> age age.10 n
#> 1 18 1.8 1
#> 2 22 2.2 1
#> 3 25 2.5 1
#> 4 26 2.6 1
#> 5 27 2.7 3
#> 6 28 2.8 1
#> 7 29 2.9 1
#> 8 30 3.0 5
#> 9 31 3.1 2
#> 10 32 3.2 5
#> 11 33 3.3 7
#> 12 34 3.4 4
#> 13 35 3.5 2
#> 14 36 3.6 10
#> 15 37 3.7 2
#> 16 38 3.8 10
#> 17 39 3.9 14
#> 18 40 4.0 8
#> 19 41 4.1 7
#> 20 42 4.2 7
#> 21 43 4.3 11
#> 22 44 4.4 8
#> 23 45 4.5 13
#> 24 46 4.6 19
#> 25 47 4.7 12
#> 26 48 4.8 15
#> 27 49 4.9 13
#> 28 50 5.0 14
#> 29 51 5.1 10
#> 30 52 5.2 20
#> 31 53 5.3 22
#> 32 54 5.4 16
#> 33 55 5.5 27
#> 34 56 5.6 31
#> 35 57 5.7 31
#> 36 58 5.8 29
#> 37 59 5.9 31
#> 38 60 6.0 31
#> 39 61 6.1 36
#> 40 62 6.2 21
#> 41 63 6.3 29
#> 42 64 6.4 36
#> 43 65 6.5 28
#> 44 66 6.6 35
#> 45 67 6.7 24
#> 46 68 6.8 38
#> 47 69 6.9 20
#> 48 70 7.0 36
#> 49 71 7.1 24
#> 50 72 7.2 25
#> 51 73 7.3 20
#> 52 74 7.4 34
#> 53 75 7.5 17
#> 54 76 7.6 21
#> 55 77 7.7 11
#> 56 78 7.8 5
#> 57 79 7.9 7
#> 58 80 8.0 8
#> 59 81 8.1 5
#> 60 82 8.2 2
#> 61 83 8.3 1
#> 62 85 8.5 1
#>
#> $counts[[5]]
#> age age.factor2 n
#> 1 18 <60 years 1
#> 2 22 <60 years 1
#> 3 25 <60 years 1
#> 4 26 <60 years 1
#> 5 27 <60 years 3
#> 6 28 <60 years 1
#> 7 29 <60 years 1
#> 8 30 <60 years 5
#> 9 31 <60 years 2
#> 10 32 <60 years 5
#> 11 33 <60 years 7
#> 12 34 <60 years 4
#> 13 35 <60 years 2
#> 14 36 <60 years 10
#> 15 37 <60 years 2
#> 16 38 <60 years 10
#> 17 39 <60 years 14
#> 18 40 <60 years 8
#> 19 41 <60 years 7
#> 20 42 <60 years 7
#> 21 43 <60 years 11
#> 22 44 <60 years 8
#> 23 45 <60 years 13
#> 24 46 <60 years 19
#> 25 47 <60 years 12
#> 26 48 <60 years 15
#> 27 49 <60 years 13
#> 28 50 <60 years 14
#> 29 51 <60 years 10
#> 30 52 <60 years 20
#> 31 53 <60 years 22
#> 32 54 <60 years 16
#> 33 55 <60 years 27
#> 34 56 <60 years 31
#> 35 57 <60 years 31
#> 36 58 <60 years 29
#> 37 59 <60 years 31
#> 38 60 60+ years 31
#> 39 61 60+ years 36
#> 40 62 60+ years 21
#> 41 63 60+ years 29
#> 42 64 60+ years 36
#> 43 65 60+ years 28
#> 44 66 60+ years 35
#> 45 67 60+ years 24
#> 46 68 60+ years 38
#> 47 69 60+ years 20
#> 48 70 60+ years 36
#> 49 71 60+ years 24
#> 50 72 60+ years 25
#> 51 73 60+ years 20
#> 52 74 60+ years 34
#> 53 75 60+ years 17
#> 54 76 60+ years 21
#> 55 77 60+ years 11
#> 56 78 60+ years 5
#> 57 79 60+ years 7
#> 58 80 60+ years 8
#> 59 81 60+ years 5
#> 60 82 60+ years 2
#> 61 83 60+ years 1
#> 62 85 60+ years 1
#>
#> $counts[[6]]
#> obstruct obstruct.factor n
#> 1 0 No 732
#> 2 1 Yes 176
#> 3 NA <NA> 21
#>
#> $counts[[7]]
#> perfor perfor.factor n
#> 1 0 No 902
#> 2 1 Yes 27
#>
#> $counts[[8]]
#> adhere adhere.factor n
#> 1 0 No 794
#> 2 1 Yes 135
#>
#> $counts[[9]]
#> nodes node4 n
#> 1 0 0 2
#> 2 1 0 269
#> 3 1 1 5
#> 4 2 0 194
#> 5 3 0 124
#> 6 3 1 1
#> 7 4 0 81
#> 8 4 1 3
#> 9 5 0 1
#> 10 5 1 45
#> 11 6 1 43
#> 12 7 1 38
#> 13 8 0 1
#> 14 8 1 22
#> 15 9 0 1
#> 16 9 1 19
#> 17 10 1 13
#> 18 11 1 10
#> 19 12 1 11
#> 20 13 1 7
#> 21 14 1 4
#> 22 15 1 6
#> 23 16 1 1
#> 24 17 1 2
#> 25 19 1 2
#> 26 20 1 2
#> 27 22 1 1
#> 28 24 1 1
#> 29 27 1 1
#> 30 33 1 1
#> 31 NA 0 1
#> 32 NA 1 17
#>
#> $counts[[10]]
#> nodes node4.factor n
#> 1 0 No 2
#> 2 1 No 269
#> 3 1 Yes 5
#> 4 2 No 194
#> 5 3 No 124
#> 6 3 Yes 1
#> 7 4 No 81
#> 8 4 Yes 3
#> 9 5 No 1
#> 10 5 Yes 45
#> 11 6 Yes 43
#> 12 7 Yes 38
#> 13 8 No 1
#> 14 8 Yes 22
#> 15 9 No 1
#> 16 9 Yes 19
#> 17 10 Yes 13
#> 18 11 Yes 10
#> 19 12 Yes 11
#> 20 13 Yes 7
#> 21 14 Yes 4
#> 22 15 Yes 6
#> 23 16 Yes 1
#> 24 17 Yes 2
#> 25 19 Yes 2
#> 26 20 Yes 2
#> 27 22 Yes 1
#> 28 24 Yes 1
#> 29 27 Yes 1
#> 30 33 Yes 1
#> 31 NA No 1
#> 32 NA Yes 17
#>
#> $counts[[11]]
#> status status.factor n
#> 1 0 Alive 477
#> 2 1 Died 452
#>
#> $counts[[12]]
#> differ differ.factor n
#> 1 1 Well 93
#> 2 2 Moderate 663
#> 3 3 Poor 150
#> 4 NA <NA> 23
#>
#> $counts[[13]]
#> surg surg.factor n
#> 1 0 Short 668
#> 2 1 Long 244
#> 3 NA <NA> 17
#>
#> $counts[[14]]
#> node4 node4.factor n
#> 1 0 No 674
#> 2 1 Yes 255
#>
#> $counts[[15]]
#> sex.factor sex.factor2 n
#> 1 Female M 445
#> 2 Male F 484
#>
#> $counts[[16]]
#> age.factor age.factor2 n
#> 1 <40 years <60 years 70
#> 2 40-59 years <60 years 344
#> 3 60+ years 60+ years 515
#>
#> $counts[[17]]
#> loccomp loccomp.factor n
#> 1 0 No 616
#> 2 1 Yes 293
#> 3 NA <NA> 20
#>
#> $counts[[18]]
#> mort_5yr mort_5yr.num n
#> 1 Alive 1 511
#> 2 Died 2 404
#> 3 <NA> NA 14
#>
#> $counts[[19]]
#> sex.factor2 age.factor2 n
#> 1 M <60 years 204
#> 2 M 60+ years 241
#> 3 F <60 years 210
#> 4 F 60+ years 274
#>
#>
# Select a tibble and expand
out$counts[[9]]
#> nodes node4 n
#> 1 0 0 2
#> 2 1 0 269
#> 3 1 1 5
#> 4 2 0 194
#> 5 3 0 124
#> 6 3 1 1
#> 7 4 0 81
#> 8 4 1 3
#> 9 5 0 1
#> 10 5 1 45
#> 11 6 1 43
#> 12 7 1 38
#> 13 8 0 1
#> 14 8 1 22
#> 15 9 0 1
#> 16 9 1 19
#> 17 10 1 13
#> 18 11 1 10
#> 19 12 1 11
#> 20 13 1 7
#> 21 14 1 4
#> 22 15 1 6
#> 23 16 1 1
#> 24 17 1 2
#> 25 19 1 2
#> 26 20 1 2
#> 27 22 1 1
#> 28 24 1 1
#> 29 27 1 1
#> 30 33 1 1
#> 31 NA 0 1
#> 32 NA 1 17
# Note this variable (node4) appears miscoded in original dataset survival::colon.
# Choose to only include variables that you actually use.
# This uses standard Finalfit grammar.
dependent = "mort_5yr"
explanatory = c("age.factor2", "sex.factor2")
colon_s_small %>%
check_recode(dependent, explanatory)
#> $index
#> # A tibble: 4 × 2
#> var1 var2
#> <chr> <chr>
#> 1 mort_5yr mort_5yr.num
#> 2 age.factor2 age.factor
#> 3 sex.factor2 sex.factor
#> 4 sex.factor2 age.factor2
#>
#> $counts
#> $counts[[1]]
#> mort_5yr mort_5yr.num n
#> 1 Alive 1 511
#> 2 Died 2 404
#> 3 <NA> NA 14
#>
#> $counts[[2]]
#> age.factor2 age.factor n
#> 1 <60 years <40 years 70
#> 2 <60 years 40-59 years 344
#> 3 60+ years 60+ years 515
#>
#> $counts[[3]]
#> sex.factor2 sex.factor n
#> 1 M Female 445
#> 2 F Male 484
#>
#> $counts[[4]]
#> sex.factor2 age.factor2 n
#> 1 M <60 years 204
#> 2 M 60+ years 241
#> 3 F <60 years 210
#> 4 F 60+ years 274
#>
#>