From 53b494f9d9655a6b64b9081f3b3024555599f2cb Mon Sep 17 00:00:00 2001 From: Eilis Hannon Date: Thu, 9 Jan 2025 20:57:22 +0000 Subject: [PATCH] remove redundant scripts --- array/DNAm/analysis/EWAS/crr.r | 107 ----- array/DNAm/analysis/EWAS/dmrcate.r | 151 ------- array/DNAm/analysis/EWAS/dmrff.r | 130 ------ array/DNAm/analysis/EWAS/lm.r | 92 ---- array/DNAm/analysis/EWAS/summariseDMPs.r | 508 ----------------------- 5 files changed, 988 deletions(-) delete mode 100755 array/DNAm/analysis/EWAS/crr.r delete mode 100644 array/DNAm/analysis/EWAS/dmrcate.r delete mode 100644 array/DNAm/analysis/EWAS/dmrff.r delete mode 100755 array/DNAm/analysis/EWAS/lm.r delete mode 100755 array/DNAm/analysis/EWAS/summariseDMPs.r diff --git a/array/DNAm/analysis/EWAS/crr.r b/array/DNAm/analysis/EWAS/crr.r deleted file mode 100755 index 08cfe336..00000000 --- a/array/DNAm/analysis/EWAS/crr.r +++ /dev/null @@ -1,107 +0,0 @@ -##---------------------------------------------------------------------# -## -## Title: EWAS with clustered robust regression model -## -## Purpose of script: perform DNA methylation association analysis of -## schizophrenia vs controls testing for main and cell-specific effects. -## simulataneously testing for cell type differences -## -## Author: Eilis Hannon -## -## Date Created: 2022-07-08 -## -##---------------------------------------------------------------------# - - -#----------------------------------------------------------------------# -# DEFINE ANALYSIS FUNCTION -#----------------------------------------------------------------------# - -runEWAS<-function(row,QCmetrics){ - - p.df <- pdata.frame(data.frame("meth" = row, "phenotype" = QCmetrics$Phenotype, "age" = QCmetrics$CCDNAmAge, "sex" = QCmetrics$Sex, "cell.type" = QCmetrics$Cell.type, "brain.bank" = QCmetrics$Tissue.Centre, "id" = QCmetrics$Indidivual.ID), index = c("id"), drop.index = F) - - modelP <- plm(meth ~ phenotype * cell.type + age + sex + brain.bank, data = p.df, model = "pooling") - nullP <- plm(meth ~ phenotype + cell.type + age + sex + brain.bank, data = p.df, model = "pooling") - nullCT<-plm(meth ~ phenotype + age + sex + brain.bank, data = p.df, model = "pooling") - - # compute Stata like df-adjustment - G <- length(unique(p.df$id)) - N <- length(p.df$id) - dfa <- (G/(G - 1)) * (N - 1)/modelP$df.residual - - # display with cluster VCE and df-adjustment - firm_c_vcov <- dfa * vcovHC(modelP, type = "HC0", cluster = "group", adjust = T) - model<-coeftest(modelP, vcov = firm_c_vcov) - - # extract case control main effect - c(model["phenotypeSchizophrenia",c(1,2,4)], - - # extract cell specific case control effect - waldtest(modelP,nullP, vcov = firm_c_vcov, test = "F")[2,4], - model["phenotypeSchizophrenia:cell.typeNeuN+",c(1,2,4)], - model["phenotypeSchizophrenia:cell.typeSox10+",c(1,2,4)], - - # extract cell type effect - waldtest(modelP, nullCT, vcov = firm_c_vcov, test = "F")[2,4], - model["cell.typeNeuN+",c(1,2,4)], - model["cell.typeSox10+",c(1,2,4)]) - -} - - - - -#----------------------------------------------------------------------# -# LOAD PACKAGES -#----------------------------------------------------------------------# - -library(plm) -library(lmtest) -library(doParallel) - -#----------------------------------------------------------------------# -# DEFINE PARAMETERS -#----------------------------------------------------------------------# - -args<-commandArgs(trailingOnly = TRUE) -dataDir <- args[1] - -normData<-file.path(dataDir, "/3_normalised/normalised.rdata") -resPath<-file.path(dataDir, "/4_analysis/EWAS") - -#----------------------------------------------------------------------# -# LOAD AND PREPARE DATA -#----------------------------------------------------------------------# - -setwd(dataDir) -load(normData) - -## remove total samples and cell types with less than 20 samples -QCmetrics<-QCmetrics[which(QCmetrics$Cell.type != "Total"),] -nSample<-table(QCmetrics$Cell.type) -QCmetrics<-QCmetrics[QCmetrics$Cell.type %in% names(nSample[which(nSample > 19)]),] - -# filter to schizophrenia and control only -QCmetrics<-QCmetrics[QCmetrics$Phenotype %in% c("Schizophrenia", "Control"),] - -celltypeNormbeta<-celltypeNormbeta[,QCmetrics$Basename] -cellTypes<-unique(QCmetrics$Cell.type) - -#----------------------------------------------------------------------# -# INTITATE PARALLEL ENV -#----------------------------------------------------------------------# - -nCores<-detectCores() -cl <- makeCluster(nCores-1) -registerDoParallel(cl) -clusterExport(cl, list("runEWAS", "pdata.frame", "plm", "vcovHC", "coeftest", "waldtest")) - -outtab<-matrix(data = parRapply(cl, celltypeNormbeta, runEWAS, QCmetrics), ncol = 17, byrow = TRUE) - - -rownames(outtab)<-rownames(celltypeNormbeta) -colnames(outtab)<-c("SCZ_coeff", "SCZ_SE", "SCZ_P", "CellType_SCZ_P", "NeuN_SCZ_coeff", "NeuN_SCZ_SE","NeuN_SCZ_P", "SOX10_SCZ_coeff", "SOX10_SCZ_SE","SOX10_SCZ_P", "CellType_P","NeuN_coeff", "NeuN_SE", "NeuN_P", "SOX10_coeff", "SOX10_SE", "SOX10_P") - - -save(outtab, file = file.path(resPath, "RobustClusteredErrors.rdata")) \ No newline at end of file diff --git a/array/DNAm/analysis/EWAS/dmrcate.r b/array/DNAm/analysis/EWAS/dmrcate.r deleted file mode 100644 index ef948f0e..00000000 --- a/array/DNAm/analysis/EWAS/dmrcate.r +++ /dev/null @@ -1,151 +0,0 @@ -#---------------------------------------------------------------------# -## -## Title: Regional analysis for case control within each cell type -## -## Purpose of script: identify regions of differential DNA methylation for -## schizophrenia vs controls for each cell type separately using DMRcate -## -## Note this script uses a custom version of DMRplot that can't be added to -## this repository due to license restrictions -##---------------------------------------------------------------------# - -#----------------------------------------------------------------------# -# LOAD PACKAGES -#----------------------------------------------------------------------# - -library(DMRcate) - -#----------------------------------------------------------------------# -# DEFINE FUNCTIONS -#----------------------------------------------------------------------# - -gg_color_hue <- function(n) { - hues = seq(15, 375, length = n + 1) - hcl(h = hues, l = 65, c = 100)[1:n] -} - -#----------------------------------------------------------------------# -# DEFINE PARAMETERS -#----------------------------------------------------------------------# - -args<-commandArgs(trailingOnly = TRUE) -dataDir <- args[1] -cellType <- args[2] -refPath <- args[3] - -normData<-file.path(dataDir, "3_normalised/normalised.rdata") -cellCompData<-file.path(dataDir, "4_analysis", "EstimatedNeuralCellComposition.rdata") -resPath<-file.path(dataDir, "4_analysis/EWAS") - -#----------------------------------------------------------------------# -# LOAD AND PREPARE DATA -#----------------------------------------------------------------------# - -setwd(dataDir) -load(normData) -load(cellCompData) - -print(paste0("running DMRcate on ", cellType, " cell type...")) -## subset to cell type samples -QCmetrics<-QCmetrics[which(QCmetrics$Cell.type == cellType),] - -# filter to schizophrenia and control only -QCmetrics<-QCmetrics[QCmetrics$Phenotype %in% c("Schizophrenia", "Control"),] - - -# add cell composition proportions & CETYGO scores - -if (cellType == "Double-"){ - QCmetrics$Cell.Proportion <- predPropBest[QCmetrics$Basename, "NeuNNeg_Sox10Neg_IRF8Pos"] -} else if (cellType == "NeuN+"){ - QCmetrics$Cell.Proportion <- predPropBest[QCmetrics$Basename, "NeuNPos_SOX6Pos"] -} - -QCmetrics$CETYGO <- predPropBest[QCmetrics$Basename, "CETYGO"] - -cols<-c("Basename", "Phenotype", "Cell.Proportion", "CETYGO", "CCDNAmAge", "Sex", "Tissue.Centre") -cols <- cols[cols %in% colnames(QCmetrics)] -QCmetrics<-na.omit(QCmetrics[, cols]) - -# subset beta matrix to analysis samples -celltypeNormbeta<-celltypeNormbeta[,QCmetrics$Basename] - -#----------------------------------------------------------------------# -# REMOVE CROSS HYB & SNP PROBES -#----------------------------------------------------------------------# - -crosshyb<-read.table(file.path(refPath, "CrossHydridisingProbes_McCartney.txt"), stringsAsFactors = FALSE) -tofilter<-read.csv(file.path(refPath, "EPICArrayProbesToFilter.csv"), stringsAsFactors = FALSE) -snpProbes<-read.table(file.path(refPath, "SNPProbes_McCartney.txt"), stringsAsFactors = FALSE, header = TRUE) -crosshyb2<-read.csv(file.path(refPath, "Pidsley_SM1.csv"), stringsAsFactors = FALSE) -snpProbes2<-read.csv(file.path(refPath, "Pidsley_SM4.csv"), stringsAsFactors = FALSE) -snpProbes3<-read.csv(file.path(refPath, "Pidsley_SM5.csv"), stringsAsFactors = FALSE) -snpProbes4<-read.csv(file.path(refPath, "Pidsley_SM6.csv"), stringsAsFactors = FALSE) - -snpProbes<-snpProbes[which(snpProbes$DIST_FROM_MAPINFO < 10 & snpProbes$AF > 0.01),] -snpProbes2<-snpProbes2[which(snpProbes2$AF > 0.01),] -snpProbes3<-snpProbes3[which(snpProbes3$AF > 0.01),] -snpProbes4<-snpProbes4[which(snpProbes4$AF > 0.01),] - -dist<-cbind(abs(snpProbes4$VARIANT_END - snpProbes4$MAPINFO), abs(snpProbes4$VARIANT_START - snpProbes4$MAPINFO)) -dist<-apply(dist, 1, min) -snpProbes4<-snpProbes4[which(dist <=10),] - -remove<-unique(c(tofilter$IlmnID, crosshyb[,1], snpProbes$IlmnID, snpProbes2$PROBE, snpProbes3$PROBE, snpProbes4$PROBE)) -remove<-intersect(remove, rownames(celltypeNormbeta)) - -celltypeNormbeta <- celltypeNormbeta[-match(remove, rownames(celltypeNormbeta)),] - - -#----------------------------------------------------------------------# -# DEFINE ANALYSIS -#----------------------------------------------------------------------# - -if("Cell.Proportion" %in% cols){ - design <- model.matrix(~ Phenotype + Cell.Proportion + CETYGO + CCDNAmAge + Sex + Tissue.Centre, QCmetrics) -} else { - design <- model.matrix(~ Phenotype + CETYGO + CCDNAmAge + Sex + Tissue.Centre, QCmetrics) -} -siteAnnotation <- cpg.annotate("array", celltypeNormbeta, what = "B", arraytype = "EPIC", -analysis.type = "differential", design = design, coef = 2) - -# compare to our LM results -load(file.path(resPath, paste0(cellType, "LM.rdata"))) -outtab<-outtab[names(siteAnnotation@ranges),] - -pdf(file.path(resPath, "Plots", paste0("ScatterplotLMDMRCate", cellType, ".pdf")), width = 10, height = 5) -par(mfrow = c(1,2)) -plot(-log10(siteAnnotation@ranges$ind.fdr), -log10(outtab[,"FullModel_SCZ_P"]), - xlab = "DMRcate -log10P", ylab = "LM -log10P", pch = 16, - col = gg_color_hue(2)[as.factor(siteAnnotation@ranges$is.sig)]) -plot(siteAnnotation@ranges$diff, outtab[,"FullModel_SCZ_coeff"], - xlab = "DMRcate mean diff", ylab = "LM Mean diff", pch = 16, - col = gg_color_hue(2)[as.factor(siteAnnotation@ranges$is.sig)]) -abline(v = 0) -abline(h = 0) -dev.off() - -dmrcoutput <- dmrcate(siteAnnotation, lambda=1000, C=2) -results.ranges <- extractRanges(dmrcoutput, genome = "hg19") - -cols<-as.factor(QCmetrics$Phenotype) -names(cols)<-QCmetrics$Basename - -pdf(file.path(resPath, "Plots", paste0("DMRcate_DMRs_", cellType, ".pdf"))) -for(i in 1:length(results.ranges)){ -DMR.plot (ranges = results.ranges, - dmr = i, - CpGs = celltypeNormbeta, - phen.col = cols, - genome = "hg19", - labels = names(ranges), - flank = 50, - boxplot.flank = min(c(5, round(width(results.ranges)[i]*0.05))), - extra.ranges = NULL) - -} -dev.off() - -save(dmrcoutput, file = file.path(resPath, paste0(cellType,"DMRCate.rdata"))) -write.csv(results.ranges, file.path(resPath, "Tables", paste0("DMRcate_DMRs_", cellType, ".csv"))) - diff --git a/array/DNAm/analysis/EWAS/dmrff.r b/array/DNAm/analysis/EWAS/dmrff.r deleted file mode 100644 index 7dd8d7e1..00000000 --- a/array/DNAm/analysis/EWAS/dmrff.r +++ /dev/null @@ -1,130 +0,0 @@ -# ---------------------------------------------------------------------# -## -## Title: Regional analysis for case control within each cell type -## -## Purpose of script: identify regions of differential DNA methylation for -## schizophrenia vs controls for each cell type separately using dmrff -## -## ---------------------------------------------------------------------# - -#----------------------------------------------------------------------# -# LOAD PACKAGES -#----------------------------------------------------------------------# - -library(dmrff) - -#----------------------------------------------------------------------# -# DEFINE PARAMETERS -#----------------------------------------------------------------------# - -args <- commandArgs(trailingOnly = TRUE) -dataDir <- args[1] -cellType <- args[2] -refPath <- args[3] - -maxgap <- 1000 -p.cutoff <- 0.05 - -normData<-file.path(dataDir, "3_normalised/normalised.rdata") -resPath <- file.path(dataDir, "4_analysis/EWAS") - - -#----------------------------------------------------------------------# -# LOAD AND PREPARE DATA -#----------------------------------------------------------------------# - -load(normData) - -print(paste0("running dmrff on ", cellType, " cell type...")) -## subset to cell type samples -QCmetrics<-QCmetrics[which(QCmetrics$Cell.type == cellType),] - -# filter to schizophrenia and control only -QCmetrics<-QCmetrics[QCmetrics$Phenotype %in% c("Schizophrenia", "Control"),] - -# subset beta matrix to analysis samples -celltypeNormbeta<-celltypeNormbeta[,QCmetrics$Basename] - -load(file.path(resPath, paste0(cellType, "LM.rdata"))) - - -#----------------------------------------------------------------------# -# REMOVE CROSS HYB & SNP PROBES -#----------------------------------------------------------------------# - -crosshyb <- read.table(file.path(refPath, "CrossHydridisingProbes_McCartney.txt"), stringsAsFactors = FALSE) -tofilter <- read.csv(file.path(refPath, "EPICArrayProbesToFilter.csv"), stringsAsFactors = FALSE) -snpProbes <- read.table(file.path(refPath, "SNPProbes_McCartney.txt"), stringsAsFactors = FALSE, header = TRUE) -crosshyb2 <- read.csv(file.path(refPath, "Pidsley_SM1.csv"), stringsAsFactors = FALSE) -snpProbes2 <- read.csv(file.path(refPath, "Pidsley_SM4.csv"), stringsAsFactors = FALSE) -snpProbes3 <- read.csv(file.path(refPath, "Pidsley_SM5.csv"), stringsAsFactors = FALSE) -snpProbes4 <- read.csv(file.path(refPath, "Pidsley_SM6.csv"), stringsAsFactors = FALSE) - -snpProbes <- snpProbes[which(snpProbes$DIST_FROM_MAPINFO < 10 & snpProbes$AF > 0.01), ] -snpProbes2 <- snpProbes2[which(snpProbes2$AF > 0.01), ] -snpProbes3 <- snpProbes3[which(snpProbes3$AF > 0.01), ] -snpProbes4 <- snpProbes4[which(snpProbes4$AF > 0.01), ] - -dist <- cbind(abs(snpProbes4$VARIANT_END - snpProbes4$MAPINFO), abs(snpProbes4$VARIANT_START - snpProbes4$MAPINFO)) -dist <- apply(dist, 1, min) -snpProbes4 <- snpProbes4[which(dist <= 10), ] - -remove <- unique(c(tofilter$IlmnID, crosshyb[, 1], snpProbes$IlmnID, snpProbes2$PROBE, snpProbes3$PROBE, snpProbes4$PROBE)) -remove <- intersect(remove, rownames(outtab)) - -outtab <- outtab[-match(remove, rownames(outtab)), ] - - -# add gene annotation -probeAnnot <- read.table(file.path(refPath, "EPIC.anno.GRCh38.tsv"), sep = "\t", header = TRUE, stringsAsFactors = FALSE) -probeAnnot <- probeAnnot[match(rownames(outtab), probeAnnot$probeID), ] -probeAnnot$chrm <- gsub("chr", "", probeAnnot$chrm) -probeAnnot$start <- probeAnnot$start+1 - -outtab<- cbind(outtab, probeAnnot[, c("chrm", "start", "GeneNames", "GeneClasses")]) -outtab<- outtab[which(outtab[,"chrm"] != "Y"),] -outtab<- outtab[which(outtab[,"chrm"] != "*"),] - - -celltypeNormbeta<-celltypeNormbeta[rownames(outtab),] - - -#----------------------------------------------------------------------# -# RUN DMR ANALYSIS -#----------------------------------------------------------------------# - -## find candidate regions -candidates <- dmrff.candidates( - estimate=outtab$FullModel_SCZ_coeff, - p.value=outtab$FullModel_SCZ_P, - chr=outtab$chrm, - pos=outtab$start, - maxgap=maxgap, - p.cutoff=p.cutoff, - verbose=T) - -write.csv(candidates, file = file.path(resPath, "Tables", paste0(cellType,"CollapsedRegions_0.05.csv"))) - -candidates <- dmrff.candidates( - estimate=outtab$FullModel_SCZ_coeff, - p.value=outtab$FullModel_SCZ_P, - chr=outtab$chrm, - pos=outtab$start, - maxgap=maxgap, - p.cutoff=1e-6, - verbose=T) - -write.csv(candidates, file = file.path(resPath, "Tables", paste0(cellType,"CollapsedRegions_1e-6.csv"))) - - -dmrs <- dmrff(estimate = outtab$FullModel_SCZ_coeff, - se=outtab$FullModel_SCZ_SE, - p.value = outtab$FullModel_SCZ_P, - methylation = celltypeNormbeta, - chr = outtab$chrm, - pos = outtab$start, - maxgap = maxgap, - verbose = T, - p.cutoff = p.cutoff) - -write.csv(dmrs, file = file.path(resPath, "Tables", paste0(cellType,"dmrff_0.05.csv"))) \ No newline at end of file diff --git a/array/DNAm/analysis/EWAS/lm.r b/array/DNAm/analysis/EWAS/lm.r deleted file mode 100755 index e7ec96a4..00000000 --- a/array/DNAm/analysis/EWAS/lm.r +++ /dev/null @@ -1,92 +0,0 @@ -##---------------------------------------------------------------------# -## -## Title: EWAS with linear regression model -## -## Purpose of script: perform DNA methylation association analysis of -## schizophrenia vs controls testing for main and cell-specific effects. -## simulataneously testing for cell type differences -## -## Author: Eilis Hannon -## -## Date Created: 2022-07-08 -## -##---------------------------------------------------------------------# - - -#----------------------------------------------------------------------# -# DEFINE ANALYSIS FUNCTION -#----------------------------------------------------------------------# - -runEWAS<-function(row,QCmetrics){ - - modelLM<-lm(row ~ QCmetrics$Phenotype * QCmetrics$Cell.type + QCmetrics$CCDNAmAge + QCmetrics$Sex + QCmetrics$Tissue.Centre) - nullLM<-lm(row ~ QCmetrics$Phenotype + QCmetrics$Cell.type + QCmetrics$CCDNAmAge + QCmetrics$Sex + QCmetrics$Tissue.Centre) - nullCT<-lm(row ~ QCmetrics$Phenotype + QCmetrics$CCDNAmAge + QCmetrics$Sex + QCmetrics$Tissue.Centre) - - # extract case control main effect - return(c(summary(modelLM)$coefficients["QCmetrics$PhenotypeSchizophrenia",c(1,2,4)], - - # extract cell specific case control effect - anova(modelLM,nullLM)[2,6], - summary(modelLM)$coefficients["QCmetrics$PhenotypeSchizophrenia:QCmetrics$Cell.typeNeuN+",c(1,2,4)], - summary(modelLM)$coefficients["QCmetrics$PhenotypeSchizophrenia:QCmetrics$Cell.typeSox10+",c(1,2,4)], - - # extract cell type effect - anova(nullLM, nullCT)[2,6], - summary(nullLM)$coefficients["QCmetrics$Cell.typeNeuN+",c(1,2,4)], - summary(nullLM)$coefficients["QCmetrics$Cell.typeSox10+",c(1,2,4)])) -} - - - -#----------------------------------------------------------------------# -# LOAD PACKAGES -#----------------------------------------------------------------------# - -library(doParallel) - -#----------------------------------------------------------------------# -# DEFINE PARAMETERS -#----------------------------------------------------------------------# - -args<-commandArgs(trailingOnly = TRUE) -dataDir <- args[1] - -normData<-file.path(dataDir, "/3_normalised/normalised.rdata") -resPath<-file.path(dataDir, "/4_analysis/EWAS") - -#----------------------------------------------------------------------# -# LOAD AND PREPARE DATA -#----------------------------------------------------------------------# - -setwd(dataDir) -load(normData) - -## remove total samples and cell types with less than 20 samples -QCmetrics<-QCmetrics[which(QCmetrics$Cell.type != "Total"),] -nSample<-table(QCmetrics$Cell.type) -QCmetrics<-QCmetrics[QCmetrics$Cell.type %in% names(nSample[which(nSample > 19)]),] - -# filter to schizophrenia and control only -QCmetrics<-QCmetrics[QCmetrics$Phenotype %in% c("Schizophrenia", "Control"),] - -celltypeNormbeta<-celltypeNormbeta[,QCmetrics$Basename] -cellTypes<-unique(QCmetrics$Cell.type) - -#----------------------------------------------------------------------# -# INTITATE PARALLEL ENV -#----------------------------------------------------------------------# - -nCores<-detectCores() -cl <- makeCluster(nCores-1) -registerDoParallel(cl) -clusterExport(cl, list("runEWAS")) - -outtab<-matrix(data = parRapply(cl, celltypeNormbeta, runEWAS, QCmetrics), ncol = 17, byrow = TRUE) - - -rownames(outtab)<-rownames(celltypeNormbeta) -colnames(outtab)<-c("SCZ_coeff", "SCZ_SE", "SCZ_P", "CellType_SCZ_P", "NeuN_SCZ_coeff", "NeuN_SCZ_SE","NeuN_SCZ_P", "SOX10_SCZ_coeff", "SOX10_SCZ_SE","SOX10_SCZ_P", "CellType_P","NeuN_coeff", "NeuN_SE", "NeuN_P", "SOX10_coeff", "SOX10_SE", "SOX10_P") - - -save(outtab, file = file.path(resPath, "LM.rdata")) \ No newline at end of file diff --git a/array/DNAm/analysis/EWAS/summariseDMPs.r b/array/DNAm/analysis/EWAS/summariseDMPs.r deleted file mode 100755 index 8e79460e..00000000 --- a/array/DNAm/analysis/EWAS/summariseDMPs.r +++ /dev/null @@ -1,508 +0,0 @@ -##---------------------------------------------------------------------# -## -## Title: Classify & Compare EWAS Results -## -## Purpose of script: Characterise EWAS results and produce summary plots -## -##---------------------------------------------------------------------# - - -#----------------------------------------------------------------------# -# DEFINE CHARACTERISATION FUNCTIONS -#----------------------------------------------------------------------# - -calcCTDiffs<-function(res){ - # use output of interaction model to calculate mean case control difference for each cell type - res<-cbind(res, res[,"SCZ_coeff"], - res[,"SCZ_coeff"]+res[,"NeuN_SCZ_coeff"], - res[,"SCZ_coeff"]+res[,"SOX10_SCZ_coeff"]) - colnames(res)[(ncol(res)-2):ncol(res)]<-c("DNeg_Mean_Diff", "NEUN_Mean_Diff", "SOX10_Mean_Diff") - return(res) -} - -calcMaxCTDiff<-function(res){ - res<-cbind(res, apply(abs(as.matrix(res[,c("DNeg_Mean_Diff", "NEUN_Mean_Diff", "SOX10_Mean_Diff")])), 1, max)) - colnames(res)[ncol(res)]<-"CTMaxDiff" - return(res) -} - - -classifyDMPs<-function(res, pThres1 = 5e-5, pThres2 = 0.05){ - # identify DMPs for each cell type - # effect in dneg if SCZ_P signif - dneg_dmp <- res[,"SCZ_P"] < pThres1 - # effect in neun if SCZ_P signif and neuN_SCZ_P not signif or if SCZ_P not signif and NeuN_SCZ_P signif - neun_dmp <- res[,"SCZ_P"] < pThres1 & res[,"NeuN_SCZ_P"] > pThres2 | res[,"SCZ_P"] > pThres2 & res[,"NeuN_SCZ_P"] < pThres1 - # effect in sox10 if SCZ_P signif and SOX10_SCZ_P not signif or if SCZ_P not signif and SOX10_SCZ_P signif - sox10_dmp <- res[,"SCZ_P"] < pThres1 & res[,"SOX10_SCZ_P"] > pThres2 | res[,"SCZ_P"] > pThres2 & res[,"SOX10_SCZ_P"] < pThres1 - - res<-cbind(res, dneg_dmp, neun_dmp, sox10_dmp) - colnames(res)[(ncol(res)-2):ncol(res)]<-c("DNeg_DMP", "NEUN_DMP", "SOX10_DMP") - return(res) -} - - -#----------------------------------------------------------------------# -# LOAD PACKAGES -#----------------------------------------------------------------------# - -library(qqman) -library(IlluminaHumanMethylationEPICanno.ilm10b4.hg19) -library(ggplot2) - -#----------------------------------------------------------------------# -# DEFINE PARAMETERS -#----------------------------------------------------------------------# - -args<-commandArgs(trailingOnly = TRUE) -dataDir <- args[1] -refPath <- args[2] - -normData<-file.path(dataDir, "/3_normalised/normalised.rdata") -resPath<-file.path(dataDir, "/4_analysis/EWAS") - -pos <- position_dodge(0.9) - -#----------------------------------------------------------------------# -# LOAD AND PREPARE DATA -#----------------------------------------------------------------------# - -setwd(dataDir) -load(normData) - -## remove total samples and cell types with less than 20 samples -QCmetrics<-QCmetrics[which(QCmetrics$Cell.type != "Total"),] -nSample<-table(QCmetrics$Cell.type) -QCmetrics<-QCmetrics[QCmetrics$Cell.type %in% names(nSample[which(nSample > 19)]),] - -# filter to schizophrenia and control only -QCmetrics<-QCmetrics[QCmetrics$Phenotype %in% c("Schizophrenia", "Control"),] - -celltypeNormbeta<-celltypeNormbeta[,QCmetrics$Basename] -cellTypes<-unique(QCmetrics$Cell.type) - -# load results -load(file.path(resPath, "LM.rdata")) -res.lm<-outtab -load(file.path(resPath, "MLM.rdata")) -res.mlm<-outtab -load(file.path(resPath, "RobustClusteredErrors.rdata")) -res.rce<-outtab - -load("Analysis/CellType/ANOVABrainCellTypes.rdata") - -#----------------------------------------------------------------------# -# REMOVE CROSS HYB & SNP PROBES -#----------------------------------------------------------------------# - -## filter out cross hyb & SNP probes -crosshyb<-read.table(file.path(refPath, "CrossHydridisingProbes_McCartney.txt"), stringsAsFactors = FALSE) -tofilter<-read.csv(file.path(refPath, "EPICArrayProbesToFilter.csv"), stringsAsFactors = FALSE) -snpProbes<-read.table(file.path(refPath, "SNPProbes_McCartney.txt"), stringsAsFactors = FALSE, header = TRUE) -crosshyb2<-read.csv(file.path(refPath, "Pidsley_SM1.csv"), stringsAsFactors = FALSE) -snpProbes2<-read.csv(file.path(refPath, "Pidsley_SM4.csv"), stringsAsFactors = FALSE) -snpProbes3<-read.csv(file.path(refPath, "Pidsley_SM5.csv"), stringsAsFactors = FALSE) -snpProbes4<-read.csv(file.path(refPath, "Pidsley_SM6.csv"), stringsAsFactors = FALSE) - -snpProbes<-snpProbes[which(snpProbes$DIST_FROM_MAPINFO < 10 & snpProbes$AF > 0.01),] -snpProbes2<-snpProbes2[which(snpProbes2$AF > 0.01),] -snpProbes3<-snpProbes3[which(snpProbes3$AF > 0.01),] -snpProbes4<-snpProbes4[which(snpProbes4$AF > 0.01),] - -dist<-cbind(abs(snpProbes4$VARIANT_END - snpProbes4$MAPINFO), abs(snpProbes4$VARIANT_START - snpProbes4$MAPINFO)) -dist<-apply(dist, 1, min) -snpProbes4<-snpProbes4[which(dist <=10),] - -remove<-unique(c(tofilter$IlmnID, crosshyb[,1], snpProbes$IlmnID, snpProbes2$PROBE, snpProbes3$PROBE, snpProbes4$PROBE)) -remove<-intersect(remove, rownames(res.lm)) - -if(length(remove) > 0){ - res.lm<-res.lm[-match(remove, rownames(res.lm)),] - res.mlm<-res.mlm[-match(remove, rownames(res.mlm)),] - res.rce<-res.rce[-match(remove, rownames(res.rce)),] -} - -#----------------------------------------------------------------------# -# ANNOTATE RESULTS -#----------------------------------------------------------------------# - -# add columns with cell type mean differences -res.lm<-calcCTDiffs(res.lm) -res.mlm<-calcCTDiffs(res.mlm) -res.rce<-calcCTDiffs(res.rce) - -res.lm<-calcMaxCTDiff(res.lm) -res.mlm<-calcMaxCTDiff(res.mlm) -res.rce<-calcMaxCTDiff(res.rce) - -# add columns to indicate cell type dmps -res.lm<-classifyDMPs(res.lm) -res.mlm<-classifyDMPs(res.mlm) -res.rce<-classifyDMPs(res.rce) - -# add gene annotation -annoObj <- minfi::getAnnotationObject("IlluminaHumanMethylationEPICanno.ilm10b4.hg19") -all <- minfi:::.availableAnnotation(annoObj)$defaults -newfData <- do.call(cbind, lapply(all, function(wh) { - minfi:::.annoGet(wh, envir = annoObj@data) -})) -newfData <- newfData[rownames(res.lm), ] - -res.lm<-cbind(res.lm, newfData[,c("chr", "pos", "UCSC_RefGene_Name", "UCSC_RefGene_Group", "Regulatory_Feature_Group", "Regulatory_Feature_Name", "Relation_to_Island")]) -res.mlm<-cbind(res.mlm, newfData[,c("chr", "pos", "UCSC_RefGene_Name", "UCSC_RefGene_Group", "Regulatory_Feature_Group", "Regulatory_Feature_Name", "Relation_to_Island")]) -res.rce<-cbind(res.rce, newfData[,c("chr", "pos", "UCSC_RefGene_Name", "UCSC_RefGene_Group", "Regulatory_Feature_Group", "Regulatory_Feature_Name", "Relation_to_Island")]) - -#----------------------------------------------------------------------# -# WRITE TABLES -#----------------------------------------------------------------------# - -dmps.lm<-res.lm[rowSums(as.matrix(res.lm[,c("DNeg_DMP", "NEUN_DMP", "SOX10_DMP")])) > 0,] -dmps.mlm<-res.mlm[rowSums(as.matrix(res.mlm[,c("DNeg_DMP", "NEUN_DMP", "SOX10_DMP")])) > 0,] -dmps.rce<-res.rce[rowSums(as.matrix(res.rce[,c("DNeg_DMP", "NEUN_DMP", "SOX10_DMP")])) > 0,] - -write.csv(dmps.lm, file.path(resPath, "Tables", "DiscoveryDMPsAllCellTypesLM.csv")) -write.csv(dmps.mlm, file.path(resPath, "Tables", "DiscoveryDMPsAllCellTypesMLM.csv")) -write.csv(dmps.rce, file.path(resPath, "Tables", "DiscoveryDMPsAllCellTypesCRR.csv")) - -#----------------------------------------------------------------------# -# SUMMARY PLOTS -#----------------------------------------------------------------------# - -tiff(file.path(resPath, "Plots","QQplotsMainSCZEffect.tiff"), width = 1200, height = 400) -par(mfrow = c(1,3)) -qq(res.lm[,"SCZ_P"]) -qq(res.mlm[,"SCZ_P"]) -qq(res.rce[,"SCZ_P"]) -dev.off() - -tiff(file.path(resPath, "Plots","QQplotsSCZCTIntEffect.tiff"), width = 1200, height = 400) -par(mfrow = c(1,3)) -qq(res.lm[,"CellType_SCZ_P"]) -qq(res.mlm[,"CellType_SCZ_P"]) -qq(res.rce[,"CellType_SCZ_P"]) -dev.off() - -tiff(file.path(resPath, "Plots","QQplotsSCZNEUNCTIntEffect.tiff"), width = 1200, height = 400) -par(mfrow = c(1,3)) -qq(res.lm[,"NeuN_SCZ_P"]) -qq(res.mlm[,"NeuN_SCZ_P"]) -qq(res.rce[,"NeuN_SCZ_P"]) -dev.off() - -tiff(file.path(resPath, "Plots","QQplotsSCZSOX10CTIntEffect.tiff"), width = 1200, height = 400) -par(mfrow = c(1,3)) -qq(res.lm[,"SOX10_SCZ_P"]) -qq(res.mlm[,"SOX10_SCZ_P"]) -qq(res.rce[,"SOX10_SCZ_P"]) -dev.off() - -### compare P-values across models -tiff(file.path(resPath, "Plots","Scatterlotlog10SCZMainEffectP.tiff"), width = 1200, height = 400) -par(mfrow = c(1,3)) -plot(-log10(res.lm[,"SCZ_P"]), -log10(res.mlm[,"SCZ_P"]), pch = 16, xlab = "OLS", ylab = "Mixed effects", cex = 1.1) -abline(a = 0, b = 1) -plot(-log10(res.lm[,"SCZ_P"]), -log10(res.rce[,"SCZ_P"]), pch = 16, xlab = "OLS", ylab = "Robust clustered", cex = 1.1) -abline(a = 0, b = 1) -plot(-log10(res.mlm[,"SCZ_P"]), -log10(res.rce[,"SCZ_P"]), pch = 16, xlab = "Mixed effects", ylab = "Robust clustered", cex = 1.1) -abline(a = 0, b = 1) -dev.off() - -## manhattan plots -res.lm[,"chr"][which(res.lm[,"chr"] == "chrX")]<-"chr23" -res.lm[,"chr"][which(res.lm[,"chr"] == "chrY")]<-"chr24" -res.lm[,"chr"]<-gsub("chr", "", res.lm[,"chr"]) -res.lm[,"chr"]<-as.numeric(as.character(res.lm[,"chr"])) - -res<-data.frame("SNP" = rownames(res.lm), "P" = res.lm[,"SCZ_P"], "CHR" = res.lm[,"chr"],"BP" = res.lm[,"pos"]) -res<-na.omit(res) -jpeg(file.path(resPath, "Plots", "ManhattanPlotsLM.jpeg"), width = 600, height = 600, quality = 100) -par(mfrow = c(3,1)) -par(mar = c(4,4,0.5,0.5)) -manhattan(res, genomewideline = -log10(9e-8), suggestiveline = -log10(5e-5)) - -res<-data.frame("SNP" = rownames(res.lm), "P" = res.lm[,"NeuN_SCZ_P"], "CHR" = res.lm[,"chr"],"BP" = res.lm[,"pos"]) -res<-na.omit(res) -manhattan(res, genomewideline = -log10(9e-8), suggestiveline = -log10(5e-5)) -res<-data.frame("SNP" = rownames(res.lm), "P" = res.lm[,"SOX10_SCZ_P"], "CHR" = res.lm[,"chr"],"BP" = res.lm[,"pos"]) -res<-na.omit(res) -manhattan(res, genomewideline = -log10(9e-8), suggestiveline = -log10(5e-5)) -dev.off() - -res.mlm[,"chr"][which(res.mlm[,"chr"] == "chrX")]<-"chr23" -res.mlm[,"chr"][which(res.mlm[,"chr"] == "chrY")]<-"chr24" -res.mlm[,"chr"]<-gsub("chr", "", res.mlm[,"chr"]) -res.mlm[,"chr"]<-as.numeric(as.character(res.mlm[,"chr"])) - -res<-data.frame("SNP" = rownames(res.mlm), "P" = res.mlm[,"SCZ_P"], "CHR" = res.mlm[,"chr"],"BP" = res.mlm[,"pos"]) -res<-na.omit(res) -jpeg(file.path(resPath, "Plots", "ManhattanPlotsMLM.jpeg"), width = 600, height = 600, quality = 100) -par(mfrow = c(3,1)) -par(mar = c(4,4,0.5,0.5)) -manhattan(res, genomewideline = -log10(9e-8), suggestiveline = -log10(5e-5)) - -res<-data.frame("SNP" = rownames(res.mlm), "P" = res.mlm[,"NeuN_SCZ_P"], "CHR" = res.mlm[,"chr"],"BP" = res.mlm[,"pos"]) -res<-na.omit(res) -manhattan(res, genomewideline = -log10(9e-8), suggestiveline = -log10(5e-5)) -res<-data.frame("SNP" = rownames(res.mlm), "P" = res.mlm[,"SOX10_SCZ_P"], "CHR" = res.mlm[,"chr"],"BP" = res.mlm[,"pos"]) -res<-na.omit(res) -manhattan(res, genomewideline = -log10(9e-8), suggestiveline = -log10(5e-5)) -dev.off() - -res.rce[,"chr"][which(res.rce[,"chr"] == "chrX")]<-"chr23" -res.rce[,"chr"][which(res.rce[,"chr"] == "chrY")]<-"chr24" -res.rce[,"chr"]<-gsub("chr", "", res.rce[,"chr"]) -res.rce[,"chr"]<-as.numeric(as.character(res.rce[,"chr"])) - -res<-data.frame("SNP" = rownames(res.rce), "P" = res.rce[,"SCZ_P"], "CHR" = res.rce[,"chr"],"BP" = res.rce[,"pos"]) -res<-na.omit(res) -jpeg(file.path(resPath, "Plots", "ManhattanPlotsCRR.jpeg"), width = 600, height = 600, quality = 100) -par(mfrow = c(3,1)) -par(mar = c(4,4,0.5,0.5)) -manhattan(res, genomewideline = -log10(9e-8), suggestiveline = -log10(5e-5)) - -res<-data.frame("SNP" = rownames(res.rce), "P" = res.rce[,"NeuN_SCZ_P"], "CHR" = res.rce[,"chr"],"BP" = res.rce[,"pos"]) -res<-na.omit(res) -manhattan(res, genomewideline = -log10(9e-8), suggestiveline = -log10(5e-5)) -res<-data.frame("SNP" = rownames(res.rce), "P" = res.rce[,"SOX10_SCZ_P"], "CHR" = res.rce[,"chr"],"BP" = res.rce[,"pos"]) -res<-na.omit(res) -manhattan(res, genomewideline = -log10(9e-8), suggestiveline = -log10(5e-5)) -dev.off() - - -## COMPARE DMPS ACROSS CELL TYPES -library(tidyr) -pdf(file.path(resPath, "Plots","ViolinPlotCelltypeEffectsDiscoveryDMPsLM.pdf"), width = 6, height = 6) -diffLong<-gather(data.frame(dmps.lm[,c("DNeg_Mean_Diff", "NEUN_Mean_Diff", "SOX10_Mean_Diff")])) -diffLong$key<-gsub("_Mean_Diff", "", diffLong$key) -diffLong$value<-abs(diffLong$value) -ggplot(diffLong, aes(x = key, y = value, fill = key)) + geom_violin() +xlab("Cell Type") + ylab("Mean difference") + ggtitle(paste("Across", nrow(dmps.lm), "Discovery DMPs")) + stat_summary(fun=mean, geom="point", size=2, color="black") -dev.off() - -pdf(file.path(resPath, "Plots","ViolinPlotCelltypeEffectsDiscoveryDMPsMLM.pdf"), width = 6, height = 6) -diffLong<-gather(data.frame(dmps.mlm[,c("DNeg_Mean_Diff", "NEUN_Mean_Diff", "SOX10_Mean_Diff")])) -diffLong$key<-gsub("_Mean_Diff", "", diffLong$key) -diffLong$value<-abs(diffLong$value) -ggplot(diffLong, aes(x = key, y = value, fill = key)) + geom_violin() +xlab("Cell Type") + ylab("Mean difference") + ggtitle(paste("Across", nrow(dmps.mlm), "Discovery DMPs")) + stat_summary(fun=mean, geom="point", size=2, color="black") -dev.off() - -pdf(file.path(resPath, "Plots","ViolinPlotCelltypeEffectsDiscoveryDMPsCRR.pdf"), width = 6, height = 6) -diffLong<-gather(data.frame(dmps.rce[,c("DNeg_Mean_Diff", "NEUN_Mean_Diff", "SOX10_Mean_Diff")])) -diffLong$key<-gsub("_Mean_Diff", "", diffLong$key) -diffLong$value<-abs(diffLong$value) -ggplot(diffLong, aes(x = key, y = value, fill = key)) + geom_violin() +xlab("Cell Type") + ylab("Mean difference") + ggtitle(paste("Across", nrow(dmps.rce), "Discovery DMPs")) + stat_summary(fun=mean, geom="point", size=2, color="black") -dev.off() - - -pdf(file.path(resPath, "Plots","CompareCelltypeEffectsDiscoveryDMPsLM.pdf"), width = 12, height = 4) -par(mfrow = c(1,3)) -par(mar = c(4.5,4.5,1,1)) -plot(dmps.lm[,"DNeg_Mean_Diff"], dmps.lm[,"NEUN_Mean_Diff"], pch = 16, xlab = "Mean diff Double-ve", ylab = "Mean diff NeuN+ve", cex = 1.5, cex.axis = 1.3, cex.lab = 1.3) -abline(v = 0) -abline(h = 0) -abline(a = 0, b = 1) - -plot(dmps.lm[,"DNeg_Mean_Diff"], dmps.lm[,"SOX10_Mean_Diff"], pch = 16, xlab = "Mean diff Double-ve", ylab = "Mean diff Sox10+ve", cex = 1.5, cex.axis = 1.3, cex.lab = 1.3) -abline(v = 0) -abline(h = 0) -abline(a = 0, b = 1) - -plot(dmps.lm[,"NEUN_Mean_Diff"], dmps.lm[,"SOX10_Mean_Diff"], pch = 16, xlab = "Mean diff NeuN+ve", ylab = "Mean diff Sox10+ve", cex = 1.5, cex.axis = 1.3, cex.lab = 1.3) -abline(v = 0) -abline(h = 0) -abline(a = 0, b = 1) -dev.off() - -pdf(file.path(resPath, "Plots","CompareCelltypeEffectsDiscoveryDMPsMLM.pdf"), width = 12, height = 4) -par(mfrow = c(1,3)) -par(mar = c(4.5,4.5,1,1)) -plot(dmps.mlm[,"DNeg_Mean_Diff"], dmps.mlm[,"NEUN_Mean_Diff"], pch = 16, xlab = "Mean diff Double-ve", ylab = "Mean diff NeuN+ve", cex = 1.5, cex.axis = 1.3, cex.lab = 1.3) -abline(v = 0) -abline(h = 0) -abline(a = 0, b = 1) - -plot(dmps.mlm[,"DNeg_Mean_Diff"], dmps.mlm[,"SOX10_Mean_Diff"], pch = 16, xlab = "Mean diff Double-ve", ylab = "Mean diff Sox10+ve", cex = 1.5, cex.axis = 1.3, cex.lab = 1.3) -abline(v = 0) -abline(h = 0) -abline(a = 0, b = 1) - -plot(dmps.mlm[,"NEUN_Mean_Diff"], dmps.mlm[,"SOX10_Mean_Diff"], pch = 16, xlab = "Mean diff NeuN+ve", ylab = "Mean diff Sox10+ve", cex = 1.5, cex.axis = 1.3, cex.lab = 1.3) -abline(v = 0) -abline(h = 0) -abline(a = 0, b = 1) -dev.off() - -pdf(file.path(resPath, "Plots","CompareCelltypeEffectsDiscoveryDMPsCRR.pdf"), width = 12, height = 4) -par(mfrow = c(1,3)) -par(mar = c(4.5,4.5,1,1)) -plot(dmps.rce[,"DNeg_Mean_Diff"], dmps.rce[,"NEUN_Mean_Diff"], pch = 16, xlab = "Mean diff Double-ve", ylab = "Mean diff NeuN+ve", cex = 1.5, cex.axis = 1.3, cex.lab = 1.3) -abline(v = 0) -abline(h = 0) -abline(a = 0, b = 1) - -plot(dmps.rce[,"DNeg_Mean_Diff"], dmps.rce[,"SOX10_Mean_Diff"], pch = 16, xlab = "Mean diff Double-ve", ylab = "Mean diff Sox10+ve", cex = 1.5, cex.axis = 1.3, cex.lab = 1.3) -abline(v = 0) -abline(h = 0) -abline(a = 0, b = 1) - -plot(dmps.rce[,"NEUN_Mean_Diff"], dmps.rce[,"SOX10_Mean_Diff"], pch = 16, xlab = "Mean diff NeuN+ve", ylab = "Mean diff Sox10+ve", cex = 1.5, cex.axis = 1.3, cex.lab = 1.3) -abline(v = 0) -abline(h = 0) -abline(a = 0, b = 1) -dev.off() - -## PLOT DMPS - -library(ggplot2) -# Basic violin plot - - -pdf(file.path(resPath, "Plots", "ViolinPlotDiscoveryDMPsRCEModel.pdf"), width = 6, height = 5) -for(i in order(dmps.rce[,"CTMaxDiff"], decreasing = TRUE)){ - tmpDat<-data.frame("CellType" = QCmetrics$Cell.type, "Phenotype" = QCmetrics$Phenotype, "DNAm" = celltypeNormbeta[rownames(dmps.rce)[i],]) - p<-ggplot(tmpDat, aes(x=CellType, y=DNAm, fill = Phenotype)) + - geom_violin(position = pos, scale = 'width') + ggtitle(paste(rownames(dmps.rce)[i], dmps.rce$UCSC_RefGene_Name[i])) + - stat_summary(fun = "mean", - geom = "point", - position = pos) - print(p) -} -dev.off() - - -pdf(file.path(resPath, "Plots", "ViolinPlotDiscoveryDMPsMLMModel.pdf"), width = 6, height = 5) -for(i in order(dmps.mlm[,"CTMaxDiff"], decreasing = TRUE)){ - if(dmps.mlm[i,"CTMaxDiff"] > 0.04){ - tmpDat<-data.frame("CellType" = QCmetrics$Cell.type, "Phenotype" = QCmetrics$Phenotype, "DNAm" = celltypeNormbeta[rownames(dmps.mlm)[i],]) - p<-ggplot(tmpDat, aes(x=CellType, y=DNAm, fill = Phenotype)) + - geom_violin(position = pos, scale = 'width') + ggtitle(paste(rownames(dmps.mlm)[i], dmps.mlm$UCSC_RefGene_Name[i])) + - stat_summary(fun = "mean", - geom = "point", - position = pos) - print(p) - } -} -dev.off() - - -# characterise DMPs - -par(mfrow = c(1,3)) -plot(-log10(dmps.lm[,"SCZ_P"]),-log10(dmps.lm[,"NeuN_SCZ_P"]), pch = 16, xlab = "-log10 Main Effect P", ylab = "log10 NeuN * Interaction P") -plot(-log10(dmps.lm[,"SCZ_P"]),-log10(dmps.lm[,"CellType_P"]), pch = 16, xlab = "-log10 Main Effect P", ylab = "log10 Cell type P") - -hist(-log10(dmps.lm[which(res.lm[,"SCZ_P"] < 9e-8),"CellType_SCZ_P"]), xlab = "SCZ x Cell type P") -abline(v = -log10(0.05)) -hist(-log10(res.lm[which(res.lm[,"SCZ_P"] < 9e-8),"CellType_P"]), xlab = "Cell type P") -abline(v = -log10(9e-8), col = "red") - -par(mfrow = c(1,2)) -plot(-log10(res.mlm[,16]),-log10(res.mlm[,"CellType_P"]), pch = 16, xlab = "-log10 Main Effect P", ylab = "log10 Cell type P") -plot(-log10(res.mlm[,16]),-log10(res.mlm[,"CellType_SCZ_P"]), pch = 16, xlab = "-log10 Main Effect P", ylab = "log10 Interaction P") -hist(-log10(res.mlm[which(res.mlm[,16] < 9e-8),"CellType_P"]), xlab = "Cell type -logP", col = "gray") -abline(v = -log10(9e-8), col = "red") -hist(-log10(res.mlm[which(res.mlm[,16] < 9e-8),"CellType_SCZ_P"]), xlab = "SCZ x Cell type -logP", col = "gray") -abline(v = -log10(0.05)) - -par(mfrow = c(1,2)) -plot(-log10(res.rce[,"SCZ_P"]),-log10(res.rce[,"CellType_SCZ_P"]), pch = 16, xlab = "-log10 Main Effect P", ylab = "log10 Interaction P") -plot(-log10(res.rce[,"SCZ_P"]),-log10(res.rce[,"CellType_P"]), pch = 16, xlab = "-log10 Main Effect P", ylab = "log10 Cell type P") -hist(-log10(res.rce[order(res.rce[,"SCZ_P"])[1:25],"CellType_P"]), xlab = "Cell type P", col = "gray") -abline(v = -log10(9e-8), col = "red") -hist(-log10(res.rce[order(res.rce[,"SCZ_P"])[1:25],"CellType_SCZ_P"]), xlab = "SCZ x Cell type P", col = "gray") -abline(v = -log10(0.05)) - - -par(mfrow = c(1,3)) -par(mar = c(4.5,4.5,1,1)) -plot(dneg, neun, pch = 16, xlab = "Mean diff Double-ve", ylab = "Mean diff NeuN+ve", cex = 1.5, cex.axis = 1.3, cex.lab = 1.3) -abline(v = 0) -abline(h = 0) -abline(a = 0, b = 1) - -plot(dneg, sox, pch = 16, xlab = "Mean diff Double-ve", ylab = "Mean diff Sox10+ve", cex = 1.5, cex.axis = 1.3, cex.lab = 1.3) -abline(v = 0) -abline(h = 0) -abline(a = 0, b = 1) - -plot(neun, sox, pch = 16, xlab = "Mean diff NeuN+ve", ylab = "Mean diff Sox10+ve", cex = 1.5, cex.axis = 1.3, cex.lab = 1.3) -abline(v = 0) -abline(h = 0) -abline(a = 0, b = 1) - - - - - -## are any scz dmps sig diff between irf8 and dneg? - -ctOut<-ctOut[rownames(res.mlm),] - -par(mfrow = c(1,2)) -table(ctOut[which(res.mlm[,16] < 9e-8),6] < 9e-8) -plot(ctOut[which(res.mlm[,16] < 9e-8),5], res.mlm[which(res.mlm[,16] < 9e-8),14]*-1, pch = 16 -, col = as.factor(ctOut[which(res.mlm[,16] < 9e-8),6] < 9e-8), xlab = "IRF8vsDNeg", ylab = "SCZDiff") -abline(v = 0) -abline(h = 0) - -table(ctOut[order(res.rce[,"SCZ_P"])[1:25],6] < 9e-8) -plot(ctOut[order(res.rce[,"SCZ_P"])[1:25],5], res.rce[order(res.rce[,"SCZ_P"])[1:25],14]*-1, pch = 16, -col = as.factor(ctOut[order(res.rce[,"SCZ_P"])[1:25],6] < 9e-8), xlab = "IRF8vsDNeg", ylab = "SCZDiff") -abline(v = 0) -abline(h = 0) - -pdf("Analysis/Schizophrenia/CompareModels/BoxplotTop25DMPsRCEModel.pdf", width = 6, height = 5) -for(i in order(res.rce[,"SCZ_P"])[1:25]){ - boxplot(celltypenormbeta[rownames(res.rce)[i],] ~ pheno$Phenotype*pheno$Cell.type, names = rep(c("SCZ", "CON"), 3) - , xlab = "", ylab = "DNAm", main = "", col = c("#6699CC", "#FF6666")) - abline(v = seq(2.5, 5, 2), lty = 2) - mtext(side = 3, line = 0, adj = 0, "Double Negative") - mtext(side = 3, line = 0, adj = 0.5, "NeuN+ve") - mtext(side = 3, line = 0, adj = 1, "Sox10+ve") - mtext(side = 3, line = 2, paste(rownames(res.rce)[i], newfData$UCSC_RefGene_Name[i])) - mtext(side = 3, line = 1, adj = 1, paste("P =", signif(res.rce[i,14],3), "Int P =",signif(res.rce[i,"CellType_SCZ_P"],3))) -} -dev.off() - - -pdf("Analysis/Schizophrenia/CompareModels/BoxplotTop25DMPsMLMModel.pdf", width = 6, height = 5) -for(i in order(res.mlm[,16])[1:25]){ - boxplot(celltypenormbeta[rownames(res.mlm)[i],] ~ pheno$Phenotype*pheno$Cell.type, names = rep(c("SCZ", "CON"), 3) - , xlab = "", ylab = "DNAm", main = "", col = c("#6699CC", "#FF6666")) - abline(v = seq(2.5, 5, 2), lty = 2) - mtext(side = 3, line = 0, adj = 0, "Double Negative") - mtext(side = 3, line = 0, adj = 0.5, "NeuN+ve") - mtext(side = 3, line = 0, adj = 1, "Sox10+ve") - mtext(side = 3, line = 2, paste(rownames(res.mlm)[i], newfData$UCSC_RefGene_Name[i])) - mtext(side = 3, line = 1, adj = 1, paste("P =", signif(res.mlm[i,16],3), "Int P =",signif(res.mlm[i,"CellType_SCZ_P"],3))) -} -dev.off() - - -## replot excluding sites with significant difference between irf8 and double negative - -pdf("Analysis/Schizophrenia/CompareModels/BoxplotTop25DMPsRCEModel.pdf", width = 12, height = 5) -for(i in order(res.rce[,"SCZ_P"])[1:25]){ - boxplot(celltypenormbeta[rownames(res.rce)[i],] ~ pheno$Phenotype*pheno$Cell.type, names = rep(c("SCZ", "CON"), 3) - , xlab = "", ylab = "DNAm", main = "", col = c("#6699CC", "#FF6666")) - abline(v = seq(2.5, 5, 2), lty = 2) - mtext(side = 3, line = 0, adj = 0, "Double Negative") - mtext(side = 3, line = 0, adj = 0.5, "NeuN+ve") - mtext(side = 3, line = 0, adj = 1, "Sox10+ve") - mtext(side = 3, line = 2, paste(rownames(res.rce)[i], newfData$UCSC_RefGene_Name[i])) - mtext(side = 3, line = 1, adj = 1, paste("P =", signif(res.rce[i,16],3), "Int P =",signif(res.rce[i,"CellType_SCZ_P"],3))) -} -dev.off() - - -pdf("Analysis/Schizophrenia/CompareModels/BoxplotDMPsMLMNoDiffIRF8Model.pdf", width = 12, height = 5) -for(i in which(res.mlm[,16] < 9e-8 & ctOut[,6] > 0.05)){ - boxplot(celltypenormbeta[rownames(res.mlm)[i],] ~ pheno$Phenotype*pheno$Cell.type, names = rep(c("SCZ", "CON"), 3) - , xlab = "", ylab = "DNAm", main = "", col = c("#6699CC", "#FF6666")) - abline(v = seq(2.5, 5, 2), lty = 2) - mtext(side = 3, line = 0, adj = 0, "Double Negative") - mtext(side = 3, line = 0, adj = 0.5, "NeuN+ve") - mtext(side = 3, line = 0, adj = 1, "Sox10+ve") - mtext(side = 3, line = 2, paste(rownames(res.mlm)[i], newfData$UCSC_RefGene_Name[i])) - mtext(side = 3, line = 1, adj = 1, paste("P =", signif(res.mlm[i,16],3), "Int P =",signif(res.mlm[i,"CellType_SCZ_P"],3))) -} -dev.off() \ No newline at end of file