2022-09-27

Intro

  • Package: stRoke
  • Optimisation of functions
  • Visualising clinical data

New cpr_sex() -to- cpr_female()

## function (x) 
## {
##     last <- as.integer(substr(x, start = 11, stop = 11))
##     sex <- ifelse(last%%2 == 0, "female", "male")
##     return(sex)
## }
## <bytecode: 0x7fbbfd192940>
## <environment: namespace:daDoctoR>

(continued)

## function(cpr){
##   if (!is.vector(cpr)) stop("Input has to be vector") 
## 
##   x <- nchar(as.character(cpr)) # Formats as character to avoid confusions
##   
##   as.integer(substr(cpr, start = x, stop = x)) %% 2 == 0
## }
## <bytecode: 0x7fbbf90131b0>
## <environment: namespace:stRoke>

Testing vectorised version of cpr_female()

fsd<-sample(c("231045-0637", "010115-4000",
       "0101896000","010189-3000",
       "300450-1030","010150-4021"),
       size = 100,
       replace = T)
microbenchmark::microbenchmark(
  stRoke::cpr_female(fsd),
  daDoctoR::cpr_sex(fsd))
## Unit: microseconds
##                     expr     min       lq     mean   median      uq     max
##  stRoke::cpr_female(fsd) 118.917 119.8340 121.9624 120.5010 122.563 147.459
##   daDoctoR::cpr_sex(fsd) 130.043 132.0005 136.7349 133.0425 137.522 265.959
##  neval
##    100
##    100

daDoctoR::cpr_check

## function (cpr) 
## {
##     v <- c()
##     for (x in cpr) {
##         if (!substr(x, 7, 7) %in% c("-", ".")) {
##             x <- paste(substr(x, 1, 6), substr(x, 7, 10), collapse = "-")
##         }
##         p1 <- as.integer(substr(x, 1, 1))
##         p2 <- as.integer(substr(x, 2, 2))
##         p3 <- as.integer(substr(x, 3, 3))
##         p4 <- as.integer(substr(x, 4, 4))
##         p5 <- as.integer(substr(x, 5, 5))
##         p6 <- as.integer(substr(x, 6, 6))
##         p7 <- as.integer(substr(x, 8, 8))
##         p8 <- as.integer(substr(x, 9, 9))
##         p9 <- as.integer(substr(x, 10, 10))
##         p10 <- as.integer(substr(x, 11, 11))
##         v <- c(v, ifelse((p1 * 4 + p2 * 3 + p3 * 2 + p4 * 7 + 
##             p5 * 6 + p6 * 5 + p7 * 4 + p8 * 3 + p9 * 2 + p10)%%11 == 
##             0, "valid", "invalid"))
##     }
##     return(v)
## }
## <bytecode: 0x7fbbda4ad798>
## <environment: namespace:daDoctoR>

stRoke::cpr_check

## function(cpr){
##   # Check validity of CPR number, format ddmmyy-xxxx
##   # Build upon data from this document: https://cpr.dk/media/12066/personnummeret-i-cpr.pdf
##   ## OBS according to new description, not all valid CPR numbers apply to this modulus 11 rule.
##   message(
##     "OBS: according to new description, not all valid CPR numbers apply to this modulus 11 rule. 
##     Please refer to: https://cpr.dk/media/12066/personnummeret-i-cpr.pdf")
##   
##   str_length <- nchar(cpr) 
##   # Calculating length of each element in vector
##   
##   cpr_short <- paste0(substr(cpr,1,6),substr(cpr,str_length-3,str_length)) 
##   # Subsetting strings to first 6 and last 4 characters to short format cpr.
##   
##   cpr_matrix <- matrix(as.numeric(unlist(strsplit(cpr_short,""))),nrow=10)
##   # Splitting all strings by each character to list, unlisting and creating matrix. Default is by column.
##   
##   test_vector <- c(4,3,2,7,6,5,4,3,2,1)
##   # Multiplication vector from https://cpr.dk/media/12066/personnummeret-i-cpr.pdf
##   
##   colSums(cpr_matrix*test_vector) %% 11 == 0
##   # Testing if modulus 11 == 0 of sums of matrix * multiplication vector.
## }
## <bytecode: 0x7fbbda514f10>
## <environment: namespace:stRoke>

Testing vectorised version

microbenchmark::microbenchmark(
  stRoke::cpr_check(fsd),
  daDoctoR::cpr_check(fsd))
## Unit: microseconds
##                      expr      min       lq      mean   median        uq
##    stRoke::cpr_check(fsd)  670.084  680.980  731.0385  695.688  722.4385
##  daDoctoR::cpr_check(fsd) 1940.126 1975.605 2079.1628 1997.605 2055.0005
##       max neval
##  2571.167   100
##  4228.084   100

Clinical data

library(gtsummary)
theme_gtsummary_compact()
tbl_summary(df, by = "sex") |>
  add_overall()
Characteristic Overall, N = 6221 female, N = 2321 male, N = 3901
mfi_gen_bin_1 351 (56%) 152 (66%) 199 (51%)
mfi_phys_bin_1 322 (52%) 136 (59%) 186 (48%)
mfi_men_bin_1 163 (26%) 63 (27%) 100 (26%)
mfi_mot_bin_1 136 (22%) 55 (24%) 81 (21%)
mfi_act_bin_1 334 (54%) 125 (54%) 209 (54%)
who5_cut_1 178 (29%) 75 (32%) 103 (26%)
mdi_bin_1 34 (5.5%) 16 (6.9%) 18 (4.6%)
diabetes 58 (9.3%) 16 (6.9%) 42 (11%)
hypertension 328 (53%) 123 (53%) 205 (53%)
1 n (%)

Code - Fatigue, wellbeing and depression

cs <- viridis::viridis_pal(alpha = .2, begin = 0, end = 1, direction = 1, option = "D")
p <- plot(eulerr::euler(df|>transmute("General fatigue" = mfi_gen_bin_1,
                           "Decreased wellbeing" = who5_cut_1,
                           "Depressive symptoms" = mdi_bin_1),
           shape="ellipse"),
     quantities = TRUE,
     fill = cs,
     lty = 1:3,
     labels = list(font = 4))

Plot - Fatigue, wellbeing and depression

Code - Euler plot fatigue by sex

colnames(df)[1:5]<-c("general","physical","mental","motivation","activity")

fs <- list()
ls <- levels(factor(df$sex))
for (i in seq_along(ls)){
  fs[[i]] <- eulerr::euler(df|>filter(sex==ls[i])|>select(1:5),shape="ellipse")
  names(fs)[i]<-ls[i]
  }
ps<-list()

for (i in seq_along(ls)){
  ps[[i]] <- plot(fs[[i]],
     quantities = TRUE,
     fill = cs,
     lty = 1:5,
     labels = list(font = 4))
  names(ps)[i]<-ls[i]
}

Plots - Euler plot fatigue by sex

plot_grid(ps[[1]],ps[[2]],ncol=2,labels=ls)

Get the package

remotes::install_github(“agdamsbo/stRoke”)