Differential Methylation Analysis with Whole Genome Bisulfite Sequencing (WGBS) Data in TCGA
Data can be extracted using
tcga_data <- eh[["EH1661"]]
TCGA_bs <- eh[["EH1662"]]
file.rename(from=tcga_data,to=paste0(dirname(tcga_data),'/assays.h5'))
Phenotypic data can be extracted by
Methylation Comparison between normal and tumor sample
cov_matrix <- bsseq::getCoverage(TCGA_bs)
meth_matrix <- bsseq::getCoverage(TCGA_bs, type='M')
meth_matrix <- meth_matrix/cov_matrix
# Get total CpG coverage
totCov <- colSums(cov_matrix>0)
# Restrict to CpGs with minimum read covergae of 10
meth_matrix[cov_matrix<10] <- NA
meanMethylation <- DelayedArray::colMeans(meth_matrix, na.rm=TRUE)
sampleType <- phenoData$sample.type
Df <- data.frame('mean-methylation' = meanMethylation, 'type' = sampleType)
g <- ggplot2::ggplot()
g <- g + ggplot2::geom_boxplot(data=Df,ggplot2::aes(x=type,y=mean.methylation))
g <- g + ggplot2::xlab('sample type') + ggplot2::ylab('Methylation')
g <- g + ggplot2::theme(axis.text.x = element_text(angle = 0, hjust = 1))
g
Differential methylation analysis of tumor samples
chr1Index <- seqnames(TCGA_bs) == 'chr22'
group1 <- c(11, 6, 23) # normal samples
group2 <- c(20, 26, 25) # Tumor samples
subSample <- c(group1, group2)
TCGA_bs_sub <- updateObject(TCGA_bs[chr1Index,subSample])
TCGA_bs_sub.fit <- bsseq::BSmooth(TCGA_bs_sub, mc.cores = 2, verbose = TRUE)
TCGA_bs_sub.tstat <- bsseq::BSmooth.tstat(TCGA_bs_sub.fit,
group1 = c(1,2,3),
group2 = c(4,5,6),
estimate.var = "group2",
local.correct = TRUE,
verbose = TRUE)
plot(TCGA_bs_sub.tstat)
dmrs0 <- bsseq::dmrFinder(TCGA_bs_sub.tstat, cutoff = c(-4.6, 4.6))
dmrs <- subset(dmrs0, n >= 3 & abs(meanDiff) >= 0.1)
nrow(dmrs)
head(dmrs, n = 3)
pData <- pData(TCGA_bs_sub.fit)
pData$col <- rep(c("red", "blue"), each = 3)
pData(TCGA_bs_sub.fit) <- pData
plotRegion(TCGA_bs_sub.fit, dmrs[1,], extend = 5000, addRegions = dmrs)
## R version 3.5.1 Patched (2018-07-12 r74967)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.5 LTS
##
## Matrix products: default
## BLAS: /home/biocbuild/bbs-3.8-bioc/R/lib/libRblas.so
## LAPACK: /home/biocbuild/bbs-3.8-bioc/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=C
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] compiler_3.5.1 backports_1.1.2 magrittr_1.5 rprojroot_1.3-2
## [5] htmltools_0.3.6 tools_3.5.1 yaml_2.2.0 Rcpp_0.12.19
## [9] stringi_1.2.4 rmarkdown_1.10 knitr_1.20 stringr_1.3.1
## [13] digest_0.6.18 evaluate_0.12