Fast imputation via mode, mean, sampling according to allele frequencies, or 0.

snp_fastImputeSimple(
  Gna,
  method = c("mode", "mean0", "mean2", "random"),
  ncores = 1
)

Arguments

Gna

A FBM.code256 (typically <bigSNP>$genotypes).
You can have missing values in these data.

method

Either "random" (sampling according to allele frequencies), "mean0" (rounded mean), "mean2" (rounded mean to 2 decimal places), "mode" (most frequent call).

ncores

Number of cores used. Default doesn't use parallelism. You may use bigstatsr::nb_cores().

Value

A new FBM.code256 object (same file, but different code).

See also

Examples

bigsnp <- snp_attachExtdata("example-missing.bed")
G <- bigsnp$genotypes
G[, 2]  # some missing values
#>   [1] NA NA  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  1  0  1  0
#>  [26]  0  0  0  0  1  0  1  0  0  0  0  1  0  1  0  0  0  0  0  0  0  0  0  1  0
#>  [51]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
#>  [76]  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 NA
#> [101]  0  1  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0 NA  0  0  0  0  0
#> [126]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0 NA  0
#> [151]  1  0  0  0  0  0  0  0  0  0  0 NA  0  0  0  0  0  0  0  0  0  0  1  0  0
#> [176]  0  0  0  0  0  0 NA  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  1  0  0
G2 <- snp_fastImputeSimple(G)
G2[, 2]  # no missing values anymore
#>   [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 1
#>  [38] 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
#>  [75] 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0
#> [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
#> [149] 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
#> [186] 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0
G[, 2]  # imputed, but still returning missing values
#>   [1] NA NA  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  1  0  1  0
#>  [26]  0  0  0  0  1  0  1  0  0  0  0  1  0  1  0  0  0  0  0  0  0  0  0  1  0
#>  [51]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
#>  [76]  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 NA
#> [101]  0  1  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0 NA  0  0  0  0  0
#> [126]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0 NA  0
#> [151]  1  0  0  0  0  0  0  0  0  0  0 NA  0  0  0  0  0  0  0  0  0  0  1  0  0
#> [176]  0  0  0  0  0  0 NA  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  1  0  0
G$copy(code = CODE_IMPUTE_PRED)[, 2]  # need to decode imputed values
#>   [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 1
#>  [38] 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
#>  [75] 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0
#> [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
#> [149] 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
#> [186] 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0

G$copy(code = c(0, 1, 2, rep(0, 253)))[, 2]  # "imputation" by 0
#>   [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 1
#>  [38] 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
#>  [75] 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0
#> [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
#> [149] 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
#> [186] 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0