A Split-Apply-Combine strategy to parallelize the evaluation of a function on each SNP, independently.
snp_split(infos.chr, FUN, combine, ncores = 1, ...)Vector of integers specifying each SNP's chromosome.
Typically <bigSNP>$map$chromosome.
The function to be applied. It must take a
FBM.code256 as first argument and ind.chr,
an another argument to provide subsetting over SNPs.
You can access the number of the chromosome by using attr(ind.chr, "chr").
function that is used by foreach::foreach to process the tasks results as they generated. This can be specified as either a function or a non-empty character string naming the function. Specifying 'c' is useful for concatenating the results into a vector, for example. The values 'cbind' and 'rbind' can combine vectors into a matrix. The values '+' and '*' can be used to process numeric data. By default, the results are returned in a list.
Number of cores used. Default doesn't use parallelism.
You may use bigstatsr::nb_cores().
Extra arguments to be passed to FUN.
The result of foreach::foreach.
This function splits indices for each chromosome, then apply a given function to each part (chromosome) and finally combine the results.
# parallelize over chromosomes made easy
# examples of functions from this package
snp_pruning
#> function (G, infos.chr, ind.row = rows_along(G), size = 49, is.size.in.bp = FALSE,
#> infos.pos = NULL, thr.r2 = 0.2, exclude = NULL, nploidy = 2,
#> ncores = 1)
#> {
#> stop2("Pruning is deprecated; please use clumping (on MAF) instead..\n%s",
#> "See why at https://bit.ly/2uKo3MN.")
#> }
#> <bytecode: 0x000002020d214578>
#> <environment: namespace:bigsnpr>
snp_clumping
#> function (G, infos.chr, ind.row = rows_along(G), S = NULL, thr.r2 = 0.2,
#> size = 100/thr.r2, infos.pos = NULL, is.size.in.bp = NULL,
#> exclude = NULL, ncores = 1)
#> {
#> check_args()
#> assert_lengths(infos.chr, cols_along(G))
#> if (!missing(is.size.in.bp))
#> warning2("Parameter 'is.size.in.bp' is deprecated.")
#> if (!is.null(infos.pos))
#> assert_lengths(infos.pos, infos.chr)
#> if (!is.null(S))
#> assert_lengths(S, infos.chr)
#> ind.noexcl <- setdiff(seq_along(infos.chr), exclude)
#> sort(unlist(lapply(split(ind.noexcl, infos.chr[ind.noexcl]),
#> function(ind.chr) {
#> clumpingChr(G, S, ind.chr, ind.row, size, infos.pos,
#> thr.r2, ncores)
#> }), use.names = FALSE))
#> }
#> <bytecode: 0x000002020bfa4b68>
#> <environment: namespace:bigsnpr>
snp_fastImpute
#> function (Gna, infos.chr, alpha = 1e-04, size = 200, p.train = 0.8,
#> n.cor = nrow(Gna), seed = NA, ncores = 1)
#> {
#> check_args()
#> assert_lengths(infos.chr, cols_along(Gna))
#> assert_package("xgboost")
#> X <- Gna$copy(code = CODE_IMPUTE_LABEL)
#> X2 <- Gna$copy(code = CODE_IMPUTE_PRED)
#> infos.imp <- FBM_infos(Gna)
#> ind.chrs <- split(seq_along(infos.chr), infos.chr)
#> for (ind in ind.chrs) {
#> imputeChr(X, X2, infos.imp, ind, alpha, size, p.train,
#> n.cor, seed, ncores)
#> }
#> infos.imp
#> }
#> <bytecode: 0x000002020d278ec0>
#> <environment: namespace:bigsnpr>