library("ExperimentHub")
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     Filter, Find, Map, Position, Reduce, anyDuplicated, append,
##     as.data.frame, basename, cbind, colMeans, colSums, colnames,
##     dirname, do.call, duplicated, eval, evalq, get, grep, grepl,
##     intersect, is.unsorted, lapply, lengths, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, rank, rbind,
##     rowMeans, rowSums, rownames, sapply, setdiff, sort, table,
##     tapply, union, unique, unsplit, which, which.max, which.min
## Loading required package: AnnotationHub
library("SummarizedExperiment")
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:ExperimentHub':
## 
##     cache
## The following object is masked from 'package:AnnotationHub':
## 
##     cache
## Loading required package: DelayedArray
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## Loading required package: BiocParallel
## 
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
## 
##     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following objects are masked from 'package:base':
## 
##     aperm, apply
library("ggplot2")

1 Introduction

The resource CLLmethylation contains complete DNA methylation data for chronic lymphocytic leukemia (CLL) patient samples. The subset of this data (for only most variable CpG sites) and the rest of the datasets and analysis resulting from the PACE project is available in BloodCancerMultiOmics2017. All of the data mentioned above was used in the analysis, which results are included in:

S Dietrich*, M Oleś*, J Lu* et al. Drug-perturbation-based stratification of blood cancer
J. Clin. Invest. (2018); 128(1):427–445. doi:10.1172/JCI93801.

* equal contribution

The raw data from 450k DNA methylation arrays is stored in the European Genome-Phenome Archive (EGA) under accession number EGAS0000100174.

2 Example of analysis

This dataset in combination with the BloodCancerMultiOmics2017 package contain rich resource for nearly 200 CLL primary samples. Here we show simple principal component analysis for the DNA methylation data.

Obtain the data.

eh = ExperimentHub()
## snapshotDate(): 2018-10-31
query(eh, "CLLmethylation")
## ExperimentHub with 1 record
## # snapshotDate(): 2018-10-31 
## # names(): EH1071
## # package(): CLLmethylation
## # $dataprovider: European Molecular Biology Laboratory
## # $species: Homo sapiens
## # $rdataclass: RangedSummarizedExperiment
## # $rdatadateadded: 2018-02-02
## # $title: DNA methylation data of CLL primary samples
## # $description: The data was produced with the use of 450k and 850k methy...
## # $taxonomyid: 9606
## # $genome: hg19
## # $sourcetype: IDAT
## # $sourceurl: https://wwwdev.ebi.ac.uk/ega/datasets/EGAD00010000948
## # $sourcesize: NA
## # $tags: c("ExperimentData", "DiseaseModel", "CancerData",
## #   "LeukemiaCancerData") 
## # retrieve record with 'object[["EH1071"]]'
meth = eh[["EH1071"]] # extract the methylation data
## see ?CLLmethylation and browseVignettes('CLLmethylation') for documentation
## downloading 0 resources
## loading from cache 
##     '/home/biocbuild//.ExperimentHub/1071'

Subset most variable CpG sites.

methData = t(assay(meth))
#filter to only include top 5000 most variable sites
ntop = 5000
methData = methData[,order(apply(methData, 2, var, na.rm=TRUE),
                          decreasing=TRUE)[1:ntop]]

Perform principal component analysis.

# principal component analysis
pcaMeth = prcomp(methData, center=TRUE, scale. = FALSE)

Summary of components.

summary(pcaMeth)
## Importance of components:
##                          PC1     PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     9.797 4.32470 3.7835 3.31975 2.70291 2.48083 2.38076
## Proportion of Variance 0.189 0.03684 0.0282 0.02171 0.01439 0.01212 0.01116
## Cumulative Proportion  0.189 0.22589 0.2541 0.27579 0.29018 0.30230 0.31347
##                            PC8     PC9    PC10    PC11    PC12    PC13
## Standard deviation     2.29235 2.12396 1.99809 1.94241 1.85233 1.82499
## Proportion of Variance 0.01035 0.00889 0.00786 0.00743 0.00676 0.00656
## Cumulative Proportion  0.32382 0.33270 0.34057 0.34800 0.35476 0.36132
##                           PC14    PC15    PC16    PC17    PC18    PC19
## Standard deviation     1.78618 1.75348 1.72802 1.72478 1.69280 1.69024
## Proportion of Variance 0.00628 0.00606 0.00588 0.00586 0.00564 0.00563
## Cumulative Proportion  0.36760 0.37366 0.37954 0.38540 0.39104 0.39667
##                           PC20    PC21    PC22    PC23   PC24    PC25   PC26
## Standard deviation     1.67947 1.66557 1.65248 1.65090 1.6397 1.63538 1.6245
## Proportion of Variance 0.00556 0.00546 0.00538 0.00537 0.0053 0.00527 0.0052
## Cumulative Proportion  0.40222 0.40769 0.41307 0.41843 0.4237 0.42900 0.4342
##                           PC27    PC28    PC29    PC30    PC31    PC32
## Standard deviation     1.62047 1.61097 1.60339 1.59524 1.58948 1.58488
## Proportion of Variance 0.00517 0.00511 0.00506 0.00501 0.00498 0.00495
## Cumulative Proportion  0.43937 0.44448 0.44954 0.45456 0.45953 0.46448
##                           PC33    PC34    PC35    PC36   PC37    PC38
## Standard deviation     1.58002 1.57605 1.56581 1.56470 1.5608 1.55914
## Proportion of Variance 0.00492 0.00489 0.00483 0.00482 0.0048 0.00479
## Cumulative Proportion  0.46940 0.47429 0.47912 0.48394 0.4887 0.49353
##                           PC39    PC40    PC41    PC42    PC43    PC44
## Standard deviation     1.55632 1.54959 1.54240 1.53450 1.53243 1.52707
## Proportion of Variance 0.00477 0.00473 0.00469 0.00464 0.00463 0.00459
## Cumulative Proportion  0.49830 0.50303 0.50771 0.51235 0.51697 0.52157
##                           PC45    PC46    PC47   PC48    PC49    PC50
## Standard deviation     1.52616 1.52263 1.52003 1.5109 1.51022 1.50507
## Proportion of Variance 0.00459 0.00457 0.00455 0.0045 0.00449 0.00446
## Cumulative Proportion  0.52616 0.53072 0.53527 0.5398 0.54426 0.54872
##                           PC51    PC52    PC53    PC54    PC55    PC56
## Standard deviation     1.49904 1.49716 1.49182 1.48680 1.48313 1.47911
## Proportion of Variance 0.00443 0.00441 0.00438 0.00435 0.00433 0.00431
## Cumulative Proportion  0.55315 0.55756 0.56195 0.56630 0.57063 0.57494
##                           PC57    PC58    PC59    PC60   PC61    PC62
## Standard deviation     1.47646 1.47397 1.46902 1.46321 1.4605 1.45694
## Proportion of Variance 0.00429 0.00428 0.00425 0.00422 0.0042 0.00418
## Cumulative Proportion  0.57924 0.58352 0.58777 0.59198 0.5962 0.60037
##                           PC63    PC64    PC65    PC66    PC67    PC68
## Standard deviation     1.45493 1.44915 1.44814 1.44175 1.43938 1.43670
## Proportion of Variance 0.00417 0.00414 0.00413 0.00409 0.00408 0.00407
## Cumulative Proportion  0.60453 0.60867 0.61280 0.61690 0.62098 0.62504
##                           PC69    PC70   PC71    PC72    PC73    PC74
## Standard deviation     1.43360 1.42897 1.4252 1.42318 1.41449 1.41432
## Proportion of Variance 0.00405 0.00402 0.0040 0.00399 0.00394 0.00394
## Cumulative Proportion  0.62909 0.63311 0.6371 0.64110 0.64504 0.64898
##                           PC75    PC76    PC77    PC78    PC79    PC80
## Standard deviation     1.40982 1.40837 1.40315 1.40071 1.39952 1.39412
## Proportion of Variance 0.00391 0.00391 0.00388 0.00386 0.00386 0.00383
## Cumulative Proportion  0.65290 0.65680 0.66068 0.66455 0.66840 0.67223
##                           PC81    PC82    PC83    PC84    PC85    PC86
## Standard deviation     1.39254 1.39100 1.38728 1.38393 1.38267 1.37672
## Proportion of Variance 0.00382 0.00381 0.00379 0.00377 0.00377 0.00373
## Cumulative Proportion  0.67605 0.67986 0.68365 0.68743 0.69119 0.69492
##                           PC87    PC88    PC89    PC90    PC91    PC92
## Standard deviation     1.37320 1.37188 1.36688 1.36467 1.35839 1.35657
## Proportion of Variance 0.00371 0.00371 0.00368 0.00367 0.00363 0.00362
## Cumulative Proportion  0.69864 0.70235 0.70603 0.70969 0.71333 0.71695
##                           PC93    PC94    PC95    PC96    PC97    PC98
## Standard deviation     1.35419 1.35001 1.34718 1.34345 1.34137 1.33888
## Proportion of Variance 0.00361 0.00359 0.00357 0.00355 0.00354 0.00353
## Cumulative Proportion  0.72056 0.72415 0.72773 0.73128 0.73483 0.73836
##                          PC99   PC100   PC101   PC102   PC103   PC104
## Standard deviation     1.3328 1.33178 1.33021 1.32766 1.32044 1.31707
## Proportion of Variance 0.0035 0.00349 0.00349 0.00347 0.00343 0.00342
## Cumulative Proportion  0.7419 0.74535 0.74884 0.75231 0.75574 0.75916
##                          PC105   PC106   PC107   PC108   PC109   PC110
## Standard deviation     1.31605 1.31225 1.30600 1.30497 1.30237 1.30046
## Proportion of Variance 0.00341 0.00339 0.00336 0.00335 0.00334 0.00333
## Cumulative Proportion  0.76257 0.76596 0.76932 0.77268 0.77602 0.77935
##                          PC111   PC112   PC113   PC114   PC115   PC116
## Standard deviation     1.29975 1.29727 1.29250 1.29130 1.28839 1.28462
## Proportion of Variance 0.00333 0.00331 0.00329 0.00328 0.00327 0.00325
## Cumulative Proportion  0.78267 0.78599 0.78928 0.79256 0.79583 0.79908
##                          PC117  PC118  PC119   PC120   PC121   PC122   PC123
## Standard deviation     1.27665 1.2744 1.2738 1.27108 1.26869 1.26381 1.26235
## Proportion of Variance 0.00321 0.0032 0.0032 0.00318 0.00317 0.00315 0.00314
## Cumulative Proportion  0.80229 0.8055 0.8087 0.81187 0.81504 0.81819 0.82133
##                          PC124   PC125   PC126   PC127   PC128   PC129
## Standard deviation     1.26090 1.25997 1.25220 1.24923 1.24775 1.24538
## Proportion of Variance 0.00313 0.00313 0.00309 0.00307 0.00307 0.00305
## Cumulative Proportion  0.82446 0.82758 0.83067 0.83375 0.83681 0.83987
##                          PC130   PC131   PC132  PC133   PC134   PC135
## Standard deviation     1.24189 1.23873 1.23812 1.2339 1.22914 1.22539
## Proportion of Variance 0.00304 0.00302 0.00302 0.0030 0.00298 0.00296
## Cumulative Proportion  0.84290 0.84593 0.84895 0.8519 0.85492 0.85788
##                          PC136   PC137  PC138   PC139   PC140   PC141
## Standard deviation     1.22316 1.22287 1.2142 1.20960 1.20581 1.20109
## Proportion of Variance 0.00295 0.00295 0.0029 0.00288 0.00286 0.00284
## Cumulative Proportion  0.86083 0.86377 0.8667 0.86956 0.87242 0.87526
##                          PC142  PC143  PC144   PC145   PC146   PC147   PC148
## Standard deviation     1.19975 1.1932 1.1915 1.18824 1.18591 1.18275 1.18191
## Proportion of Variance 0.00284 0.0028 0.0028 0.00278 0.00277 0.00276 0.00275
## Cumulative Proportion  0.87810 0.8809 0.8837 0.88648 0.88925 0.89200 0.89475
##                          PC149   PC150  PC151   PC152   PC153   PC154
## Standard deviation     1.17804 1.17350 1.1703 1.16769 1.16514 1.16096
## Proportion of Variance 0.00273 0.00271 0.0027 0.00269 0.00267 0.00265
## Cumulative Proportion  0.89749 0.90020 0.9029 0.90558 0.90826 0.91091
##                          PC155   PC156   PC157  PC158   PC159   PC160
## Standard deviation     1.15883 1.15493 1.15319 1.1500 1.14370 1.14003
## Proportion of Variance 0.00264 0.00263 0.00262 0.0026 0.00258 0.00256
## Cumulative Proportion  0.91356 0.91618 0.91880 0.9214 0.92398 0.92654
##                          PC161   PC162   PC163   PC164   PC165   PC166
## Standard deviation     1.13519 1.13294 1.12480 1.12204 1.12100 1.11807
## Proportion of Variance 0.00254 0.00253 0.00249 0.00248 0.00248 0.00246
## Cumulative Proportion  0.92908 0.93161 0.93410 0.93658 0.93906 0.94152
##                          PC167   PC168   PC169   PC170   PC171   PC172
## Standard deviation     1.11314 1.10881 1.10253 1.09688 1.09550 1.09146
## Proportion of Variance 0.00244 0.00242 0.00239 0.00237 0.00236 0.00235
## Cumulative Proportion  0.94396 0.94638 0.94878 0.95115 0.95351 0.95586
##                          PC173   PC174  PC175   PC176   PC177   PC178
## Standard deviation     1.08897 1.08206 1.0800 1.07744 1.06860 1.06076
## Proportion of Variance 0.00234 0.00231 0.0023 0.00229 0.00225 0.00222
## Cumulative Proportion  0.95819 0.96050 0.9628 0.96508 0.96733 0.96955
##                          PC179   PC180   PC181   PC182   PC183  PC184
## Standard deviation     1.04951 1.04362 1.03643 1.02953 1.02162 1.0074
## Proportion of Variance 0.00217 0.00215 0.00212 0.00209 0.00206 0.0020
## Cumulative Proportion  0.97172 0.97386 0.97598 0.97807 0.98012 0.9821
##                          PC185   PC186   PC187   PC188   PC189   PC190
## Standard deviation     1.00170 0.98416 0.97791 0.96693 0.95399 0.95055
## Proportion of Variance 0.00198 0.00191 0.00188 0.00184 0.00179 0.00178
## Cumulative Proportion  0.98410 0.98600 0.98789 0.98973 0.99152 0.99330
##                          PC191  PC192   PC193   PC194   PC195     PC196
## Standard deviation     0.93319 0.9283 0.90959 0.89557 0.19769 4.351e-15
## Proportion of Variance 0.00172 0.0017 0.00163 0.00158 0.00008 0.000e+00
## Cumulative Proportion  0.99502 0.9967 0.99834 0.99992 1.00000 1.000e+00

Visualize the components.

tmp = data.frame(pcaMeth$x)
ggplot(data=tmp, aes(x=PC1, y=PC2)) + geom_point() + theme_bw()

3 End of session

sessionInfo()
## R version 3.5.1 Patched (2018-07-12 r74967)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.5 LTS
## 
## Matrix products: default
## BLAS: /home/biocbuild/bbs-3.8-bioc/R/lib/libRblas.so
## LAPACK: /home/biocbuild/bbs-3.8-bioc/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=C              
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] CLLmethylation_1.2.0        ggplot2_3.1.0              
##  [3] SummarizedExperiment_1.12.0 DelayedArray_0.8.0         
##  [5] BiocParallel_1.16.0         matrixStats_0.54.0         
##  [7] Biobase_2.42.0              GenomicRanges_1.34.0       
##  [9] GenomeInfoDb_1.18.0         IRanges_2.16.0             
## [11] S4Vectors_0.20.0            ExperimentHub_1.8.0        
## [13] AnnotationHub_2.14.0        BiocGenerics_0.28.0        
## [15] BiocStyle_2.10.0           
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.19                  lattice_0.20-35              
##  [3] assertthat_0.2.0              rprojroot_1.3-2              
##  [5] digest_0.6.18                 mime_0.6                     
##  [7] R6_2.3.0                      plyr_1.8.4                   
##  [9] backports_1.1.2               RSQLite_2.1.1                
## [11] evaluate_0.12                 httr_1.3.1                   
## [13] pillar_1.3.0                  zlibbioc_1.28.0              
## [15] rlang_0.3.0.1                 curl_3.2                     
## [17] lazyeval_0.2.1                blob_1.1.1                   
## [19] Matrix_1.2-14                 rmarkdown_1.10               
## [21] labeling_0.3                  stringr_1.3.1                
## [23] RCurl_1.95-4.11               bit_1.1-14                   
## [25] munsell_0.5.0                 shiny_1.1.0                  
## [27] compiler_3.5.1                httpuv_1.4.5                 
## [29] xfun_0.4                      pkgconfig_2.0.2              
## [31] htmltools_0.3.6               tidyselect_0.2.5             
## [33] tibble_1.4.2                  GenomeInfoDbData_1.2.0       
## [35] interactiveDisplayBase_1.20.0 bookdown_0.7                 
## [37] withr_2.1.2                   crayon_1.3.4                 
## [39] dplyr_0.7.7                   later_0.7.5                  
## [41] bitops_1.0-6                  grid_3.5.1                   
## [43] xtable_1.8-3                  gtable_0.2.0                 
## [45] DBI_1.0.0                     magrittr_1.5                 
## [47] scales_1.0.0                  stringi_1.2.4                
## [49] XVector_0.22.0                promises_1.0.1               
## [51] bindrcpp_0.2.2                tools_3.5.1                  
## [53] bit64_0.9-7                   glue_1.3.0                   
## [55] purrr_0.2.5                   yaml_2.2.0                   
## [57] AnnotationDbi_1.44.0          colorspace_1.3-2             
## [59] BiocManager_1.30.3            memoise_1.1.0                
## [61] knitr_1.20                    bindr_0.1.1