A Split-Apply-Combine strategy to parallelize the evaluation of a function on each SNP, independently.
snp_split(infos.chr, FUN, combine, ncores = 1, ...)
Vector of integers specifying each SNP's chromosome.
Typically <bigSNP>$map$chromosome
.
The function to be applied. It must take a
FBM.code256 as first argument and ind.chr
,
an another argument to provide subsetting over SNPs.
You can access the number of the chromosome by using attr(ind.chr, "chr")
.
function that is used by foreach::foreach to process the tasks results as they generated. This can be specified as either a function or a non-empty character string naming the function. Specifying 'c' is useful for concatenating the results into a vector, for example. The values 'cbind' and 'rbind' can combine vectors into a matrix. The values '+' and '*' can be used to process numeric data. By default, the results are returned in a list.
Number of cores used. Default doesn't use parallelism.
You may use bigstatsr::nb_cores()
.
Extra arguments to be passed to FUN
.
The result of foreach::foreach.
This function splits indices for each chromosome, then apply a given function to each part (chromosome) and finally combine the results.
# parallelize over chromosomes made easy
# examples of functions from this package
snp_pruning
#> function (G, infos.chr, ind.row = rows_along(G), size = 49, is.size.in.bp = FALSE,
#> infos.pos = NULL, thr.r2 = 0.2, exclude = NULL, nploidy = 2,
#> ncores = 1)
#> {
#> stop2("Pruning is deprecated; please use clumping (on MAF) instead..\n%s",
#> "See why at https://bit.ly/2uKo3MN.")
#> }
#> <bytecode: 0x000001b46088c570>
#> <environment: namespace:bigsnpr>
snp_clumping
#> function (G, infos.chr, ind.row = rows_along(G), S = NULL, thr.r2 = 0.2,
#> size = 100/thr.r2, infos.pos = NULL, is.size.in.bp = NULL,
#> exclude = NULL, ncores = 1)
#> {
#> check_args()
#> if (!missing(is.size.in.bp))
#> warning2("Parameter 'is.size.in.bp' is deprecated.")
#> if (!is.null(S))
#> assert_lengths(infos.chr, S)
#> ind.noexcl <- setdiff(seq_along(infos.chr), exclude)
#> sort(unlist(lapply(split(ind.noexcl, infos.chr[ind.noexcl]),
#> function(ind.chr) {
#> clumpingChr(G, S, ind.chr, ind.row, size, infos.pos,
#> thr.r2, ncores)
#> }), use.names = FALSE))
#> }
#> <bytecode: 0x000001b466b21ec8>
#> <environment: namespace:bigsnpr>
snp_fastImpute
#> function (Gna, infos.chr, alpha = 1e-04, size = 200, p.train = 0.8,
#> n.cor = nrow(Gna), seed = NA, ncores = 1)
#> {
#> check_args(infos.chr = "assert_lengths(infos.chr, cols_along(Gna))")
#> assert_package("xgboost")
#> X <- Gna$copy(code = CODE_IMPUTE_LABEL)
#> X2 <- Gna$copy(code = CODE_IMPUTE_PRED)
#> infos.imp <- FBM_infos(Gna)
#> ind.chrs <- split(seq_along(infos.chr), infos.chr)
#> for (ind in ind.chrs) {
#> imputeChr(X, X2, infos.imp, ind, alpha, size, p.train,
#> n.cor, seed, ncores)
#> }
#> infos.imp
#> }
#> <bytecode: 0x000001b4604673c8>
#> <environment: namespace:bigsnpr>