Introduction

This section contains the complete ELMER code for the following analysis:

  • Vignette example
  • BRCA Supervised analysis
  • BRCA Unsupervised analysis

Vignette example

Below is the complete code that was explained in the other sections.

library(MultiAssayExperiment)
library(ELMER.data)
library(ELMER)
# get distal probes that are 2kb away from TSS on chromosome 1
distal.probes <- get.feature.probe(
  genome = "hg19", 
  met.platform = "450K", 
  rm.chr = paste0("chr",c(2:22,"X","Y"))
)
data(LUSC_RNA_refined,package = "ELMER.data") # GeneExp
data(LUSC_meth_refined,package = "ELMER.data") # Meth

mae <- createMAE(
  exp = GeneExp, 
  met = Meth,
  save = TRUE,
  linearize.exp = TRUE,
  save.filename = "mae.rda",
  filter.probes = distal.probes,
  met.platform = "450K",
  genome = "hg19",
  TCGA = TRUE
)

group.col <- "definition"
group1 <-  "Primary solid Tumor"
group2 <- "Solid Tissue Normal"
dir.out <- "result"
diff.dir <-  "hypo" # Search for hypomethylated probes in group 1

sig.diff <- get.diff.meth(
  data = mae, 
  group.col = group.col,
  group1 = group1,
  group2 = group2,
  minSubgroupFrac = 0.2,
  sig.dif = 0.3,
  diff.dir = diff.dir,
  cores = 1, 
  dir.out = dir.out,
  pvalue = 0.01
)


nearGenes <- GetNearGenes(
  data = mae, 
  probes = sig.diff$probe, 
  numFlankingGenes = 20
) # 10 upstream and 10 dowstream genes

pair <- get.pair(
  data = mae,
  group.col = group.col,
  group1 = group1,
  mode = "unsupervised",
  group2 = group2,
  nearGenes = nearGenes,
  diff.dir = diff.dir,
  minSubgroupFrac = 0.4, # % of samples to use in to create groups U/M
  permu.dir = file.path(dir.out,"permu"),
  permu.size = 100, # Please set to 100000 to get significant results
  raw.pvalue = 0.05,   
  Pe = 0.01, # Please set to 0.001 to get significant results
  filter.probes = TRUE, # See preAssociationProbeFiltering function
  filter.percentage = 0.05,
  filter.portion = 0.3,
  dir.out = dir.out,
  cores = 1,
  label = diff.dir
)

# Identify enriched motif for significantly hypomethylated probes which 
# have putative target genes.
enriched.motif <- get.enriched.motif(
  data = mae,
  probes = pair$Probe, 
  dir.out = dir.out, 
  label = diff.dir,
  min.incidence = 10,
  lower.OR = 1.1
)

TF <- get.TFs(
  data = mae, 
  mode = "unsupervised",
  group.col = group.col,
  group1 = group1,
  group2 = group2,
  enriched.motif = enriched.motif,
  dir.out = dir.out,
  cores = 1, 
  label = diff.dir
)

BRCA Unsupervised analysis

library(stringr)
library(TCGAbiolinks)
library(dplyr)
library(ELMER)
library(MultiAssayExperiment)
library(parallel)
library(readr)
dir.create("~/paper_elmer/",showWarnings = FALSE)
setwd("~/paper_elmer/")

file <- "mae_BRCA_hg38_450K_no_ffpe.rda"
if(file.exists(file)) {
  mae <- get(load(file))
} else {
  getTCGA(
    disease = "BRCA", # TCGA disease abbreviation (BRCA,BLCA,GBM, LGG, etc)
    basedir = "DATA", # Where data will be downloaded
    genome  = "hg38"
  ) # Genome of refenrece "hg38" or "hg19"
  
  distal.probes <- get.feature.probe(
    feature = NULL,
    genome = "hg38", 
    met.platform = "450K"
  )
  
  
  mae <- createMAE(
    exp = "~/paper_elmer/Data/BRCA/BRCA_RNA_hg38.rda", 
    met = "~/paper_elmer/Data/BRCA/BRCA_meth_hg38.rda", 
    met.platform = "450K",
    genome = "hg38",
    linearize.exp = TRUE,
    filter.probes = distal.probes,
    met.na.cut = 0.2,
    save = FALSE,
    TCGA = TRUE
  ) 
  # Remove FFPE samples from the analysis
  mae <- mae[,!mae$is_ffpe]
  
  # Get molecular subytpe information from cell paper and more metadata (purity etc...)
  # https://doi.org/10.1016/j.cell.2015.09.033
  file <- "http://ars.els-cdn.com/content/image/1-s2.0-S0092867415011952-mmc2.xlsx"
  downloader::download(file, basename(file))
  subtypes <- readxl::read_excel(basename(file), skip = 2)
  
  subtypes$sample <- substr(subtypes$Methylation,1,16)
  meta.data <- merge(colData(mae),subtypes,by = "sample",all.x = T)
  meta.data <- meta.data[match(colData(mae)$sample,meta.data$sample),]
  meta.data <- S4Vectors::DataFrame(meta.data)
  rownames(meta.data) <- meta.data$sample
  stopifnot(all(meta.data$patient == colData(mae)$patient))
  colData(mae) <- meta.data
  save(mae, file = "mae_BRCA_hg38_450K_no_ffpe.rda")
}
dir.out <- "BRCA_unsupervised_hg38/hypo"
cores <- 10
diff.probes <- get.diff.meth(
  data = mae, 
  group.col = "definition",
  group1 = "Primary solid Tumor",
  group2 = "Solid Tissue Normal",
  diff.dir = "hypo", # Get probes hypometh. in group 1 
  cores = cores,
  minSubgroupFrac = 0.2, # % group samples  used. 
  pvalue = 0.01, 
  sig.dif = 0.3,
  dir.out = dir.out,
  save = TRUE
)

# For each differently methylated probes we will get the 
# 20 nearby genes (10 downstream and 10 upstream)
nearGenes <- GetNearGenes(
  data = mae, 
  probes =  diff.probes$probe, 
  numFlankingGenes = 20
)

# This step is the most time consuming. Depending on the size of the groups
# and the number of probes found previously it migh take hours
Hypo.pair <- get.pair(
  data = mae,
  nearGenes = nearGenes,
  group.col = "definition",
  group1 = "Primary solid Tumor",
  group2 = "Solid Tissue Normal",
  permu.dir = paste0(dir.out,"/permu"),
  permu.size = 10000, 
  mode = "unsupervised",
  minSubgroupFrac = 0.4, # 40% of samples to create U and M
  raw.pvalue = 0.001,   
  Pe = 0.001, 
  filter.probes = TRUE,
  filter.percentage = 0.05,
  filter.portion = 0.3,
  dir.out = dir.out,
  cores = cores,
  label = "hypo"
)
# Number of pairs: 2950 


enriched.motif <- get.enriched.motif(
  data = mae,
  min.motif.quality = "DS",
  probes = unique(Hypo.pair$Probe), 
  dir.out = dir.out, 
  label = "hypo",
  min.incidence = 10,
  lower.OR = 1.1
)
TF <- get.TFs(
  data = mae, 
  group.col = "definition",
  group1 = "Primary solid Tumor",
  group2 = "Solid Tissue Normal",
  minSubgroupFrac = 0.4, # Set to 1 if supervised mode
  enriched.motif = enriched.motif,
  dir.out = dir.out, 
  cores = cores, 
  label = "hypo"
)

BRCA Supervised analysis

library(stringr)
library(TCGAbiolinks)
library(dplyr)
library(ELMER)
library(MultiAssayExperiment)
library(parallel)
library(readr)
#-----------------------------------
# 1 - Samples
# ----------------------------------
dir.create("~/paper_elmer/",showWarnings = FALSE)
setwd("~/paper_elmer/")

file <- "mae_BRCA_hg38_450K_no_ffpe.rda"
if(file.exists(file)) {
  mae <- get(load(file))
} else {
  getTCGA(
    disease = "BRCA", # TCGA disease abbreviation (BRCA,BLCA,GBM, LGG, etc)
    basedir = "DATA", # Where data will be downloaded
    genome  = "hg38"
  ) # Genome of refenrece "hg38" or "hg19"
  
  distal.probes <- get.feature.probe(
    feature = NULL,
    genome = "hg38", 
    met.platform = "450K"
  ) 
  
  mae <- createMAE(
    exp = "DATA/BRCA/BRCA_RNA_hg38.rda", 
    met = "DATA/BRCA/BRCA_meth_hg38.rda", 
    met.platform = "450K",
    genome = "hg38",
    linearize.exp = TRUE,
    filter.probes = distal.probes,
    met.na.cut = 0.2,
    save = FALSE,
    TCGA = TRUE
  ) 
  # Remove FFPE samples from the analysis
  mae <- mae[,!mae$is_ffpe]
  
  # Get molecular subytpe information from cell paper and more metadata (purity etc...)
  # https://doi.org/10.1016/j.cell.2015.09.033
  file <- "http://ars.els-cdn.com/content/image/1-s2.0-S0092867415011952-mmc2.xlsx"
  downloader::download(file, basename(file))
  subtypes <- readxl::read_excel(basename(file), skip = 2)
  
  subtypes$sample <- substr(subtypes$Methylation,1,16)
  meta.data <- merge(colData(mae),subtypes,by = "sample",all.x = T)
  meta.data <- meta.data[match(colData(mae)$sample,meta.data$sample),]
  meta.data <- S4Vectors::DataFrame(meta.data)
  rownames(meta.data) <- meta.data$sample
  stopifnot(all(meta.data$patient == colData(mae)$patient))
  colData(mae) <- meta.data
  save(mae, file = "mae_BRCA_hg38_450K_no_ffpe.rda")
}

cores <- 6
direction <- c( "hypo","hyper")
genome <- "hg38"
group.col  <- "PAM50"
groups <- t(combn(na.omit(unique(colData(mae)[,group.col])),2))
for(g in 1:nrow(groups)) {
  group1 <- groups[g,1]
  group2 <- groups[g,2]
  for (j in direction){
    tryCatch({
      message("Analysing probes ",j, "methylated in ", group1, " vs ", group2)
      dir.out <- paste0("BRCA_supervised_",genome,"/",group1,"_",group2,"/",j)
      dir.create(dir.out, recursive = TRUE)
      #--------------------------------------
      # STEP 3: Analysis                     |
      #--------------------------------------
      # Step 3.1: Get diff methylated probes |
      #--------------------------------------
      Sig.probes <- get.diff.meth(
        data       = mae,
        group.col  = group.col,
        group1     = group1,
        group2     = group2,
        sig.dif    = 0.3,
        minSubgroupFrac = 1,
        cores      = cores,
        dir.out    = dir.out,
        diff.dir   = j,
        pvalue     = 0.01
      )
      if(nrow(Sig.probes) == 0) next
      #-------------------------------------------------------------
      # Step 3.2: Identify significant probe-gene pairs            |
      #-------------------------------------------------------------
      # Collect nearby 20 genes for Sig.probes
      nearGenes <- GetNearGenes(
        data  = mae,
        probe = Sig.probes$probe
      )
      
      pair <- get.pair(
        data       = mae,
        nearGenes  = nearGenes,
        group.col  = group.col,
        group1     = group1,
        group2     = group2,
        permu.dir  = paste0(dir.out,"/permu"),
        dir.out    = dir.out,
        mode       = "supervised", 
        diff.dir   = j,
        cores      = cores,
        label      = j,
        permu.size = 10000,
        raw.pvalue = 0.001
      )
      
      Sig.probes.paired <- readr::read_csv(
        paste0(dir.out,
               "/getPair.",j,
               ".pairs.significant.csv")
      )[,1, drop = TRUE]
      
      
      #-------------------------------------------------------------
      # Step 3.3: Motif enrichment analysis on the selected probes |
      #-------------------------------------------------------------
      if(length(Sig.probes.paired) > 0 ){
        #-------------------------------------------------------------
        # Step 3.3: Motif enrichment analysis on the selected probes |
        #-------------------------------------------------------------
        enriched.motif <- get.enriched.motif(
          probes  = Sig.probes.paired,
          dir.out = dir.out,
          data    = mae,
          label   = j,
          plot.title =  paste0("BRCA: OR for paired probes ",
                               j, "methylated in ",
                               group1, " vs ",group2)
        )
        motif.enrichment <- readr::read_csv(
          paste0(dir.out,
                 "/getMotif.",j,
                 ".motif.enrichment.csv")
        )
        if(length(enriched.motif) > 0){
          #-------------------------------------------------------------
          # Step 3.4: Identifying regulatory TFs                        |
          #-------------------------------------------------------------
          print("get.TFs")
          
          TF <- get.TFs(
            data           = mae,
            enriched.motif = enriched.motif,
            dir.out        = dir.out,
            mode           = "supervised",
            group.col      = group.col,
            group1         = group1,
            diff.dir       = j,
            group2         = group2,
            cores          = cores,
            label          = j
          )
          TF.meth.cor <- get(
            load(paste0(dir.out, "/getTF.",j, ".TFs.with.motif.pvalue.rda"))
          )
          save(
            mae, TF, enriched.motif, Sig.probes.paired,
            pair, nearGenes, Sig.probes, motif.enrichment,
            TF.meth.cor,
            file = paste0(dir.out,"/ELMER_results_",j,".rda")
          )
        }
      }
    }, error = function(e){
      message(e)
    })
  }
}