library("ExperimentHub")
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## Filter, Find, Map, Position, Reduce, anyDuplicated, append,
## as.data.frame, basename, cbind, colMeans, colSums, colnames,
## dirname, do.call, duplicated, eval, evalq, get, grep, grepl,
## intersect, is.unsorted, lapply, lengths, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, rank, rbind,
## rowMeans, rowSums, rownames, sapply, setdiff, sort, table,
## tapply, union, unique, unsplit, which, which.max, which.min
## Loading required package: AnnotationHub
library("SummarizedExperiment")
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:ExperimentHub':
##
## cache
## The following object is masked from 'package:AnnotationHub':
##
## cache
## Loading required package: DelayedArray
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
## Loading required package: BiocParallel
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following objects are masked from 'package:base':
##
## aperm, apply
library("ggplot2")
The resource CLLmethylation contains complete DNA methylation data for chronic lymphocytic leukemia (CLL) patient samples. The subset of this data (for only most variable CpG sites) and the rest of the datasets and analysis resulting from the PACE project is available in BloodCancerMultiOmics2017. All of the data mentioned above was used in the analysis, which results are included in:
S Dietrich*, M Oleś*, J Lu* et al. Drug-perturbation-based stratification of blood cancer
J. Clin. Invest. (2018); 128(1):427–445. doi:10.1172/JCI93801.
* equal contribution
The raw data from 450k DNA methylation arrays is stored in the European Genome-Phenome Archive (EGA) under accession number EGAS0000100174.
This dataset in combination with the BloodCancerMultiOmics2017 package contain rich resource for nearly 200 CLL primary samples. Here we show simple principal component analysis for the DNA methylation data.
Obtain the data.
eh = ExperimentHub()
## snapshotDate(): 2018-10-31
query(eh, "CLLmethylation")
## ExperimentHub with 1 record
## # snapshotDate(): 2018-10-31
## # names(): EH1071
## # package(): CLLmethylation
## # $dataprovider: European Molecular Biology Laboratory
## # $species: Homo sapiens
## # $rdataclass: RangedSummarizedExperiment
## # $rdatadateadded: 2018-02-02
## # $title: DNA methylation data of CLL primary samples
## # $description: The data was produced with the use of 450k and 850k methy...
## # $taxonomyid: 9606
## # $genome: hg19
## # $sourcetype: IDAT
## # $sourceurl: https://wwwdev.ebi.ac.uk/ega/datasets/EGAD00010000948
## # $sourcesize: NA
## # $tags: c("ExperimentData", "DiseaseModel", "CancerData",
## # "LeukemiaCancerData")
## # retrieve record with 'object[["EH1071"]]'
meth = eh[["EH1071"]] # extract the methylation data
## see ?CLLmethylation and browseVignettes('CLLmethylation') for documentation
## downloading 0 resources
## loading from cache
## '/home/biocbuild//.ExperimentHub/1071'
Subset most variable CpG sites.
methData = t(assay(meth))
#filter to only include top 5000 most variable sites
ntop = 5000
methData = methData[,order(apply(methData, 2, var, na.rm=TRUE),
decreasing=TRUE)[1:ntop]]
Perform principal component analysis.
# principal component analysis
pcaMeth = prcomp(methData, center=TRUE, scale. = FALSE)
Summary of components.
summary(pcaMeth)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 9.797 4.32470 3.7835 3.31975 2.70291 2.48083 2.38076
## Proportion of Variance 0.189 0.03684 0.0282 0.02171 0.01439 0.01212 0.01116
## Cumulative Proportion 0.189 0.22589 0.2541 0.27579 0.29018 0.30230 0.31347
## PC8 PC9 PC10 PC11 PC12 PC13
## Standard deviation 2.29235 2.12396 1.99809 1.94241 1.85233 1.82499
## Proportion of Variance 0.01035 0.00889 0.00786 0.00743 0.00676 0.00656
## Cumulative Proportion 0.32382 0.33270 0.34057 0.34800 0.35476 0.36132
## PC14 PC15 PC16 PC17 PC18 PC19
## Standard deviation 1.78618 1.75348 1.72802 1.72478 1.69280 1.69024
## Proportion of Variance 0.00628 0.00606 0.00588 0.00586 0.00564 0.00563
## Cumulative Proportion 0.36760 0.37366 0.37954 0.38540 0.39104 0.39667
## PC20 PC21 PC22 PC23 PC24 PC25 PC26
## Standard deviation 1.67947 1.66557 1.65248 1.65090 1.6397 1.63538 1.6245
## Proportion of Variance 0.00556 0.00546 0.00538 0.00537 0.0053 0.00527 0.0052
## Cumulative Proportion 0.40222 0.40769 0.41307 0.41843 0.4237 0.42900 0.4342
## PC27 PC28 PC29 PC30 PC31 PC32
## Standard deviation 1.62047 1.61097 1.60339 1.59524 1.58948 1.58488
## Proportion of Variance 0.00517 0.00511 0.00506 0.00501 0.00498 0.00495
## Cumulative Proportion 0.43937 0.44448 0.44954 0.45456 0.45953 0.46448
## PC33 PC34 PC35 PC36 PC37 PC38
## Standard deviation 1.58002 1.57605 1.56581 1.56470 1.5608 1.55914
## Proportion of Variance 0.00492 0.00489 0.00483 0.00482 0.0048 0.00479
## Cumulative Proportion 0.46940 0.47429 0.47912 0.48394 0.4887 0.49353
## PC39 PC40 PC41 PC42 PC43 PC44
## Standard deviation 1.55632 1.54959 1.54240 1.53450 1.53243 1.52707
## Proportion of Variance 0.00477 0.00473 0.00469 0.00464 0.00463 0.00459
## Cumulative Proportion 0.49830 0.50303 0.50771 0.51235 0.51697 0.52157
## PC45 PC46 PC47 PC48 PC49 PC50
## Standard deviation 1.52616 1.52263 1.52003 1.5109 1.51022 1.50507
## Proportion of Variance 0.00459 0.00457 0.00455 0.0045 0.00449 0.00446
## Cumulative Proportion 0.52616 0.53072 0.53527 0.5398 0.54426 0.54872
## PC51 PC52 PC53 PC54 PC55 PC56
## Standard deviation 1.49904 1.49716 1.49182 1.48680 1.48313 1.47911
## Proportion of Variance 0.00443 0.00441 0.00438 0.00435 0.00433 0.00431
## Cumulative Proportion 0.55315 0.55756 0.56195 0.56630 0.57063 0.57494
## PC57 PC58 PC59 PC60 PC61 PC62
## Standard deviation 1.47646 1.47397 1.46902 1.46321 1.4605 1.45694
## Proportion of Variance 0.00429 0.00428 0.00425 0.00422 0.0042 0.00418
## Cumulative Proportion 0.57924 0.58352 0.58777 0.59198 0.5962 0.60037
## PC63 PC64 PC65 PC66 PC67 PC68
## Standard deviation 1.45493 1.44915 1.44814 1.44175 1.43938 1.43670
## Proportion of Variance 0.00417 0.00414 0.00413 0.00409 0.00408 0.00407
## Cumulative Proportion 0.60453 0.60867 0.61280 0.61690 0.62098 0.62504
## PC69 PC70 PC71 PC72 PC73 PC74
## Standard deviation 1.43360 1.42897 1.4252 1.42318 1.41449 1.41432
## Proportion of Variance 0.00405 0.00402 0.0040 0.00399 0.00394 0.00394
## Cumulative Proportion 0.62909 0.63311 0.6371 0.64110 0.64504 0.64898
## PC75 PC76 PC77 PC78 PC79 PC80
## Standard deviation 1.40982 1.40837 1.40315 1.40071 1.39952 1.39412
## Proportion of Variance 0.00391 0.00391 0.00388 0.00386 0.00386 0.00383
## Cumulative Proportion 0.65290 0.65680 0.66068 0.66455 0.66840 0.67223
## PC81 PC82 PC83 PC84 PC85 PC86
## Standard deviation 1.39254 1.39100 1.38728 1.38393 1.38267 1.37672
## Proportion of Variance 0.00382 0.00381 0.00379 0.00377 0.00377 0.00373
## Cumulative Proportion 0.67605 0.67986 0.68365 0.68743 0.69119 0.69492
## PC87 PC88 PC89 PC90 PC91 PC92
## Standard deviation 1.37320 1.37188 1.36688 1.36467 1.35839 1.35657
## Proportion of Variance 0.00371 0.00371 0.00368 0.00367 0.00363 0.00362
## Cumulative Proportion 0.69864 0.70235 0.70603 0.70969 0.71333 0.71695
## PC93 PC94 PC95 PC96 PC97 PC98
## Standard deviation 1.35419 1.35001 1.34718 1.34345 1.34137 1.33888
## Proportion of Variance 0.00361 0.00359 0.00357 0.00355 0.00354 0.00353
## Cumulative Proportion 0.72056 0.72415 0.72773 0.73128 0.73483 0.73836
## PC99 PC100 PC101 PC102 PC103 PC104
## Standard deviation 1.3328 1.33178 1.33021 1.32766 1.32044 1.31707
## Proportion of Variance 0.0035 0.00349 0.00349 0.00347 0.00343 0.00342
## Cumulative Proportion 0.7419 0.74535 0.74884 0.75231 0.75574 0.75916
## PC105 PC106 PC107 PC108 PC109 PC110
## Standard deviation 1.31605 1.31225 1.30600 1.30497 1.30237 1.30046
## Proportion of Variance 0.00341 0.00339 0.00336 0.00335 0.00334 0.00333
## Cumulative Proportion 0.76257 0.76596 0.76932 0.77268 0.77602 0.77935
## PC111 PC112 PC113 PC114 PC115 PC116
## Standard deviation 1.29975 1.29727 1.29250 1.29130 1.28839 1.28462
## Proportion of Variance 0.00333 0.00331 0.00329 0.00328 0.00327 0.00325
## Cumulative Proportion 0.78267 0.78599 0.78928 0.79256 0.79583 0.79908
## PC117 PC118 PC119 PC120 PC121 PC122 PC123
## Standard deviation 1.27665 1.2744 1.2738 1.27108 1.26869 1.26381 1.26235
## Proportion of Variance 0.00321 0.0032 0.0032 0.00318 0.00317 0.00315 0.00314
## Cumulative Proportion 0.80229 0.8055 0.8087 0.81187 0.81504 0.81819 0.82133
## PC124 PC125 PC126 PC127 PC128 PC129
## Standard deviation 1.26090 1.25997 1.25220 1.24923 1.24775 1.24538
## Proportion of Variance 0.00313 0.00313 0.00309 0.00307 0.00307 0.00305
## Cumulative Proportion 0.82446 0.82758 0.83067 0.83375 0.83681 0.83987
## PC130 PC131 PC132 PC133 PC134 PC135
## Standard deviation 1.24189 1.23873 1.23812 1.2339 1.22914 1.22539
## Proportion of Variance 0.00304 0.00302 0.00302 0.0030 0.00298 0.00296
## Cumulative Proportion 0.84290 0.84593 0.84895 0.8519 0.85492 0.85788
## PC136 PC137 PC138 PC139 PC140 PC141
## Standard deviation 1.22316 1.22287 1.2142 1.20960 1.20581 1.20109
## Proportion of Variance 0.00295 0.00295 0.0029 0.00288 0.00286 0.00284
## Cumulative Proportion 0.86083 0.86377 0.8667 0.86956 0.87242 0.87526
## PC142 PC143 PC144 PC145 PC146 PC147 PC148
## Standard deviation 1.19975 1.1932 1.1915 1.18824 1.18591 1.18275 1.18191
## Proportion of Variance 0.00284 0.0028 0.0028 0.00278 0.00277 0.00276 0.00275
## Cumulative Proportion 0.87810 0.8809 0.8837 0.88648 0.88925 0.89200 0.89475
## PC149 PC150 PC151 PC152 PC153 PC154
## Standard deviation 1.17804 1.17350 1.1703 1.16769 1.16514 1.16096
## Proportion of Variance 0.00273 0.00271 0.0027 0.00269 0.00267 0.00265
## Cumulative Proportion 0.89749 0.90020 0.9029 0.90558 0.90826 0.91091
## PC155 PC156 PC157 PC158 PC159 PC160
## Standard deviation 1.15883 1.15493 1.15319 1.1500 1.14370 1.14003
## Proportion of Variance 0.00264 0.00263 0.00262 0.0026 0.00258 0.00256
## Cumulative Proportion 0.91356 0.91618 0.91880 0.9214 0.92398 0.92654
## PC161 PC162 PC163 PC164 PC165 PC166
## Standard deviation 1.13519 1.13294 1.12480 1.12204 1.12100 1.11807
## Proportion of Variance 0.00254 0.00253 0.00249 0.00248 0.00248 0.00246
## Cumulative Proportion 0.92908 0.93161 0.93410 0.93658 0.93906 0.94152
## PC167 PC168 PC169 PC170 PC171 PC172
## Standard deviation 1.11314 1.10881 1.10253 1.09688 1.09550 1.09146
## Proportion of Variance 0.00244 0.00242 0.00239 0.00237 0.00236 0.00235
## Cumulative Proportion 0.94396 0.94638 0.94878 0.95115 0.95351 0.95586
## PC173 PC174 PC175 PC176 PC177 PC178
## Standard deviation 1.08897 1.08206 1.0800 1.07744 1.06860 1.06076
## Proportion of Variance 0.00234 0.00231 0.0023 0.00229 0.00225 0.00222
## Cumulative Proportion 0.95819 0.96050 0.9628 0.96508 0.96733 0.96955
## PC179 PC180 PC181 PC182 PC183 PC184
## Standard deviation 1.04951 1.04362 1.03643 1.02953 1.02162 1.0074
## Proportion of Variance 0.00217 0.00215 0.00212 0.00209 0.00206 0.0020
## Cumulative Proportion 0.97172 0.97386 0.97598 0.97807 0.98012 0.9821
## PC185 PC186 PC187 PC188 PC189 PC190
## Standard deviation 1.00170 0.98416 0.97791 0.96693 0.95399 0.95055
## Proportion of Variance 0.00198 0.00191 0.00188 0.00184 0.00179 0.00178
## Cumulative Proportion 0.98410 0.98600 0.98789 0.98973 0.99152 0.99330
## PC191 PC192 PC193 PC194 PC195 PC196
## Standard deviation 0.93319 0.9283 0.90959 0.89557 0.19769 4.351e-15
## Proportion of Variance 0.00172 0.0017 0.00163 0.00158 0.00008 0.000e+00
## Cumulative Proportion 0.99502 0.9967 0.99834 0.99992 1.00000 1.000e+00
Visualize the components.
tmp = data.frame(pcaMeth$x)
ggplot(data=tmp, aes(x=PC1, y=PC2)) + geom_point() + theme_bw()
sessionInfo()
## R version 3.5.1 Patched (2018-07-12 r74967)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.5 LTS
##
## Matrix products: default
## BLAS: /home/biocbuild/bbs-3.8-bioc/R/lib/libRblas.so
## LAPACK: /home/biocbuild/bbs-3.8-bioc/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=C
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] CLLmethylation_1.2.0 ggplot2_3.1.0
## [3] SummarizedExperiment_1.12.0 DelayedArray_0.8.0
## [5] BiocParallel_1.16.0 matrixStats_0.54.0
## [7] Biobase_2.42.0 GenomicRanges_1.34.0
## [9] GenomeInfoDb_1.18.0 IRanges_2.16.0
## [11] S4Vectors_0.20.0 ExperimentHub_1.8.0
## [13] AnnotationHub_2.14.0 BiocGenerics_0.28.0
## [15] BiocStyle_2.10.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.19 lattice_0.20-35
## [3] assertthat_0.2.0 rprojroot_1.3-2
## [5] digest_0.6.18 mime_0.6
## [7] R6_2.3.0 plyr_1.8.4
## [9] backports_1.1.2 RSQLite_2.1.1
## [11] evaluate_0.12 httr_1.3.1
## [13] pillar_1.3.0 zlibbioc_1.28.0
## [15] rlang_0.3.0.1 curl_3.2
## [17] lazyeval_0.2.1 blob_1.1.1
## [19] Matrix_1.2-14 rmarkdown_1.10
## [21] labeling_0.3 stringr_1.3.1
## [23] RCurl_1.95-4.11 bit_1.1-14
## [25] munsell_0.5.0 shiny_1.1.0
## [27] compiler_3.5.1 httpuv_1.4.5
## [29] xfun_0.4 pkgconfig_2.0.2
## [31] htmltools_0.3.6 tidyselect_0.2.5
## [33] tibble_1.4.2 GenomeInfoDbData_1.2.0
## [35] interactiveDisplayBase_1.20.0 bookdown_0.7
## [37] withr_2.1.2 crayon_1.3.4
## [39] dplyr_0.7.7 later_0.7.5
## [41] bitops_1.0-6 grid_3.5.1
## [43] xtable_1.8-3 gtable_0.2.0
## [45] DBI_1.0.0 magrittr_1.5
## [47] scales_1.0.0 stringi_1.2.4
## [49] XVector_0.22.0 promises_1.0.1
## [51] bindrcpp_0.2.2 tools_3.5.1
## [53] bit64_0.9-7 glue_1.3.0
## [55] purrr_0.2.5 yaml_2.2.0
## [57] AnnotationDbi_1.44.0 colorspace_1.3-2
## [59] BiocManager_1.30.3 memoise_1.1.0
## [61] knitr_1.20 bindr_0.1.1