Compiled date: 2024-10-29
Last edited: 2024-01-21
License: GPL-3
To install the Bioconductor version of the POMA package, run the following code:
# install.packages("BiocManager")
BiocManager::install("POMA")
library(POMA)
library(ggtext)
library(magrittr)
The POMA
package functions are organized into three sequential, distinct blocks: Data Preparation, Pre-processing, and Statistical Analysis.
The SummarizedExperiment
package from Bioconductor offers well-defined computational data structures for representing various types of omics experiment data (Morgan et al. 2020). Utilizing these data structures can significantly improve data analysis. POMA
leverages SummarizedExperiment
objects, enhancing the reusability of existing methods for this class and contributing to more robust and reproducible workflows.
The workflow begins with either loading or creating a SummarizedExperiment
object. Typically, your data might be stored in separate matrices and/or data frames. The PomaCreateObject
function simplifies this step by quickly building a SummarizedExperiment object for you.
# create an SummarizedExperiment object from two separated data frames
target <- readr::read_csv("your_target.csv")
features <- readr::read_csv("your_features.csv")
data <- PomaCreateObject(metadata = target, features = features)
Alternatively, if your data is already in a SummarizedExperiment
object, you can proceed directly to the pre-processing step. This vignette uses example data provided in POMA
.
# load example data
data("st000336")
st000336
> class: SummarizedExperiment
> dim: 31 57
> metadata(0):
> assays(1): ''
> rownames(31): x1_methylhistidine x3_methylhistidine ... pyruvate
> succinate
> rowData names(0):
> colnames(57): 1 2 ... 56 57
> colData names(2): group steroids
imputed <- st000336 %>%
PomaImpute(method = "knn", zeros_as_na = TRUE, remove_na = TRUE, cutoff = 20)
> 1 features removed.
imputed
> class: SummarizedExperiment
> dim: 30 57
> metadata(0):
> assays(1): ''
> rownames(30): x1_methylhistidine x3_methylhistidine ... pyruvate
> succinate
> rowData names(0):
> colnames(57): 1 2 ... 56 57
> colData names(2): group steroids
normalized <- imputed %>%
PomaNorm(method = "log_pareto")
normalized
> class: SummarizedExperiment
> dim: 30 57
> metadata(0):
> assays(1): ''
> rownames(30): x1_methylhistidine x3_methylhistidine ... pyruvate
> succinate
> rowData names(0):
> colnames(57): 1 2 ... 56 57
> colData names(2): group steroids
PomaBoxplots(imputed, x = "samples") # data before normalization
PomaBoxplots(normalized, x = "samples") # data after normalization
PomaDensity(imputed, x = "features") # data before normalization
PomaDensity(normalized, x = "features") # data after normalization
PomaOutliers(normalized)$polygon_plot
pre_processed <- PomaOutliers(normalized)$data
pre_processed
> class: SummarizedExperiment
> dim: 30 52
> metadata(0):
> assays(1): ''
> rownames(30): x1_methylhistidine x3_methylhistidine ... pyruvate
> succinate
> rowData names(0):
> colnames(52): 1 2 ... 56 57
> colData names(2): group steroids
# pre_processed %>%
# PomaUnivariate(method = "ttest") %>%
# magrittr::extract2("result")
# imputed %>%
# PomaVolcano(pval = "adjusted", labels = TRUE)
# pre_processed %>%
# PomaUnivariate(method = "mann") %>%
# magrittr::extract2("result")
# PomaLimma(pre_processed, contrast = "Controls-DMD", adjust = "fdr")
# poma_pca <- PomaMultivariate(pre_processed, method = "pca")
# poma_pca$scoresplot +
# ggplot2::ggtitle("Scores Plot")
# poma_plsda <- PomaMultivariate(pre_processed, method = "plsda")
# poma_plsda$scoresplot +
# ggplot2::ggtitle("Scores Plot")
# poma_plsda$errors_plsda_plot +
# ggplot2::ggtitle("Error Plot")
# poma_cor <- PomaCorr(pre_processed, label_size = 8, coeff = 0.6)
# poma_cor$correlations
# poma_cor$corrplot
# poma_cor$graph
# PomaCorr(pre_processed, corr_type = "glasso", coeff = 0.6)$graph
# alpha = 1 for Lasso
# PomaLasso(pre_processed, alpha = 1, labels = TRUE)$coefficientPlot
# poma_rf <- PomaRandForest(pre_processed, ntest = 10, nvar = 10)
# poma_rf$error_tree
# poma_rf$confusionMatrix$table
# poma_rf$MeanDecreaseGini_plot
sessionInfo()
> R version 4.4.1 (2024-06-14)
> Platform: x86_64-pc-linux-gnu
> Running under: Ubuntu 24.04.1 LTS
>
> Matrix products: default
> BLAS: /home/biocbuild/bbs-3.20-bioc/R/lib/libRblas.so
> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0
>
> locale:
> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
> [3] LC_TIME=en_GB LC_COLLATE=C
> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
> [9] LC_ADDRESS=C LC_TELEPHONE=C
> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
>
> time zone: America/New_York
> tzcode source: system (glibc)
>
> attached base packages:
> [1] stats4 stats graphics grDevices utils datasets methods
> [8] base
>
> other attached packages:
> [1] magrittr_2.0.3 SummarizedExperiment_1.36.0
> [3] Biobase_2.66.0 GenomicRanges_1.58.0
> [5] GenomeInfoDb_1.42.0 IRanges_2.40.0
> [7] S4Vectors_0.44.0 BiocGenerics_0.52.0
> [9] MatrixGenerics_1.18.0 matrixStats_1.4.1
> [11] patchwork_1.3.0 ggtext_0.1.2
> [13] POMA_1.16.0 BiocStyle_2.34.0
>
> loaded via a namespace (and not attached):
> [1] gtable_0.3.6 impute_1.80.0 xfun_0.48
> [4] bslib_0.8.0 ggplot2_3.5.1 lattice_0.22-6
> [7] vctrs_0.6.5 tools_4.4.1 generics_0.1.3
> [10] parallel_4.4.1 tibble_3.2.1 fansi_1.0.6
> [13] cluster_2.1.6 highr_0.11 pkgconfig_2.0.3
> [16] Matrix_1.7-1 lifecycle_1.0.4 GenomeInfoDbData_1.2.13
> [19] stringr_1.5.1 compiler_4.4.1 farver_2.1.2
> [22] tinytex_0.53 munsell_0.5.1 permute_0.9-7
> [25] htmltools_0.5.8.1 sass_0.4.9 yaml_2.3.10
> [28] pillar_1.9.0 crayon_1.5.3 jquerylib_0.1.4
> [31] tidyr_1.3.1 MASS_7.3-61 cachem_1.1.0
> [34] DelayedArray_0.32.0 vegan_2.6-8 magick_2.8.5
> [37] abind_1.4-8 nlme_3.1-166 commonmark_1.9.2
> [40] tidyselect_1.2.1 digest_0.6.37 stringi_1.8.4
> [43] dplyr_1.1.4 purrr_1.0.2 bookdown_0.41
> [46] splines_4.4.1 labeling_0.4.3 fastmap_1.2.0
> [49] grid_4.4.1 colorspace_2.1-1 cli_3.6.3
> [52] SparseArray_1.6.0 S4Arrays_1.6.0 utf8_1.2.4
> [55] withr_3.0.2 scales_1.3.0 UCSC.utils_1.2.0
> [58] rmarkdown_2.28 XVector_0.46.0 httr_1.4.7
> [61] evaluate_1.0.1 knitr_1.48 viridisLite_0.4.2
> [64] mgcv_1.9-1 markdown_1.13 rlang_1.1.4
> [67] gridtext_0.1.5 Rcpp_1.0.13 glue_1.8.0
> [70] BiocManager_1.30.25 xml2_1.3.6 jsonlite_1.8.9
> [73] R6_2.5.1 zlibbioc_1.52.0
Morgan, Martin, Valerie Obenchain, Jim Hester, and Hervé Pagès. 2020. SummarizedExperiment: SummarizedExperiment Container. https://bioconductor.org/packages/SummarizedExperiment.