Skip to content

Latest commit

 

History

History
231 lines (225 loc) · 9.74 KB

06.Co-expression network analysis by WGCNA.md

File metadata and controls

231 lines (225 loc) · 9.74 KB

WGCNA co-expression network analysis

Weighted correlation network analysis enables analysis of co-expression networks. We used WGCNA to detect co-expressed milRNAs in Hordeum vulgare and Blumeria graminis f.sp. hordei.

Loading required packages.

require(WGCNA)
require(tidyverse)

Importing data.

# The following setting is important, do not omit.
options(stringsAsFactors = FALSE)
# import data
TPM_Data = read.table("tpm_Bgh.csv", sep = ",");
feature_table <- read.csv("design.txt", sep = "\t")

Parse data.

# convert column to rownames 
TPM_Data <- column_to_rownames(TPM_Data, var = "V1")
# assign feature table to column names 
names(TPM_Data) <- feature_table$sampleid
# transpose table
TPM_Data <- t(TPM_Data)

Clustering samples and removing outliers.

# Plot the sample tree
sampleTree = hclust(dist(TPM_Data), method = "average")
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, 
     cex.axis = 1.5, cex.main = 2)
# Determine cluster under the line
clust = cutreeStatic(sampleTree, cutHeight = 15000, minSize = 10)
table(clust)
# clust 0 contains the samples we want to keep.
keepSamples = (clust==0) 
datExpr = TPM_Data[keepSamples, ]

Thresholding clusters.

### data
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
### convey feature_table to datTraits
datTraits <- feature_table
datTraits <- column_to_rownames(datTraits, var = "sampleid")
str(datTraits)
## 'data.frame':    18 obs. of  6 variables:
##  $ EPI: int  1 1 1 0 0 0 0 0 0 0 ...
##  $ HAU: int  0 0 0 1 1 1 0 0 0 0 ...
##  $ P40: int  0 0 0 0 0 0 1 1 1 0 ...
##  $ EVN: int  0 0 0 0 0 0 0 0 0 1 ...
##  $ EVY: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ MYC: int  0 0 0 0 0 0 0 0 0 0 ...
# Re-cluster samples
### dist() will calculate the PCC number among rows of matrix 
sampleTree2 = hclust(dist(datExpr), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry
traitColors = numbers2colors(datTraits, signed = FALSE)
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree2, traitColors,
                    groupLabels = names(datTraits), 
                    main = "Sample dendrogram and trait heatmap")
# Choose a set of soft-thresholding powers
powers = c(c(1:10), seq(from = 12, to=20, by=2))
# Call the network topology analysis function
### verbose simply defines how "talky" a function is so how much status messages it prints to screen.
### This has no effect on performance or output.
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
# Plot the results:
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
     main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red")
abline(h=0.90,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
     main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")

Clustering data.

sft$powerEstimate
## [1] NA
net = blockwiseModules(datExpr, power = 12, ### need to be changed 
                       TOMType = "signed", minModuleSize = 30, ### need to be changed 
                       reassignThreshold = 0, mergeCutHeight = 0.25,
                       numericLabels = TRUE, pamRespectsDendro = FALSE,
                       saveTOMs = TRUE,
                       saveTOMFileBase = "barleyTOMfilter", 
                       verbose = 3)
# Convert labels to colors for plotting
mergedColors = labels2colors(net$colors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
                    "Module colors",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)
moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs;
geneTree = net$dendrograms[[1]]
# Define numbers of genes and samples
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
# Recalculate MEs with color labels
MEs0 = moduleEigengenes(datExpr, moduleColors)$eigengenes
MEs = orderMEs(MEs0)
moduleTraitCor = cor(MEs, datTraits, use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
#
# Will display correlations and their p-values
textMatrix =  paste(signif(moduleTraitCor, 2), "\n(",
                    signif(moduleTraitPvalue, 1), ")", sep = "")
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor,
               xLabels = names(datTraits),
               yLabels = names(MEs),
               ySymbols = names(MEs),
               colorLabels = FALSE,
               colors = greenWhiteRed(50),
               textMatrix = textMatrix,
               setStdMargins = FALSE,
               cex.text = 0.1,
               zlim = c(-1,1),
               main = paste("Module-trait relationships"))

plot

Creating output tables.

# Define variable weight containing the weight column of datTrait
time_points_EPI = as.data.frame(datTraits$`EPI`)
names(time_points_EPI) = "EPI"
# names (colors) of the modules
modNames = substring(names(MEs), 3)
geneModuleMembership = as.data.frame(cor(datExpr, MEs, use = "p"))
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples))
#
names(geneModuleMembership) = paste("MM", modNames, sep="")
names(MMPvalue) = paste("p.MM", modNames, sep="")
# Gene trait significance calculation
geneTraitSignificance = as.data.frame(cor(datExpr, time_points_EPI, use = "p"));
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples));
#
names(geneTraitSignificance) = paste("GS.", names(time_points_EPI), sep="");
names(GSPvalue) = paste("p.GS.", names(time_points_EPI), sep="");

head(geneTraitSignificance)
##                      GS.EPI
## BghMilRNA_00001  0.29556495
## BghMilRNA_00002  0.04548864
## BghMilRNA_00003 -0.09952972
## BghMilRNA_00004  0.30044379
## BghMilRNA_00005 -0.10846523
## BghMilRNA_00006 -0.01677757

# print results
module = "cyan"
column = match(module, modNames)
moduleGenes = moduleColors==module

verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
                   abs(geneTraitSignificance[moduleGenes, 1]),
                   xlab = paste("Module Membership in", module, "module"),
                   ylab = "Gene significance for body weight",
                   main = paste("Module membership vs. gene significance\n"),
                   cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)

Calculate topological overlap again, refine networks.

# Calculate topological overlap anew: this could be done more efficiently by saving the TOM
dissTOM = 1-TOMsimilarityFromExpr(datExpr, power = 16)
# Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
plotTOM = dissTOM^7
# Set diagonal to NA for a nicer plot
diag(plotTOM) = NA
# Plot TOM
TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")
#
# For reproducibility, we set the random seed
set.seed(10)
select = sample(nGenes, size = nSelect)
selectTOM = dissTOM[select, select]
# There's no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.
selectTree = hclust(as.dist(selectTOM), method = "average")
selectColors = moduleColors[select]
# Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing the color palette; setting the diagonal to NA also improves the clarity of the plot
plotDiss = selectTOM^7
diag(plotDiss) = NA
TOMplot(plotDiss, selectTree, selectColors, main = "Network heatmap plot, selected genes")
# Recalculate module eigengenes
# MEs = moduleEigengenes(datExpr, moduleColors)$eigengenes
# Isolate weight from the clinical traits
# weight = as.data.frame(datTraits$weight_g);
# names(weight) = "weight"
# Add the weight to existing module eigengenes
MET = orderMEs(cbind(MEs, time_points_EPI))
# Plot the relationships among the eigengenes and the trait
plotEigengeneNetworks(MET, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle
                      = 90)
# Plot the dendrogram
plotEigengeneNetworks(MET, "Eigengene dendrogram", marDendro = c(0,4,2,0),
                      plotHeatmaps = FALSE)
# Plot the heatmap matrix (note: this plot will overwrite the dendrogram plot)
plotEigengeneNetworks(MET, "Eigengene adjacency heatmap", marHeatmap = c(3,4,2,2),
                      plotDendrograms = FALSE, xLabelsAngle = 90)
# Recalculate topological overlap if needed
TOM = TOMsimilarityFromExpr(datExpr, power = 16)
# Select modules
modules = c("lightyellow")
# Select module probes
inModule = is.finite(match(moduleColors, modules))
modProbes = colnames(datExpr)[inModule]
# Select the corresponding Topological Overlap
modTOM = TOM[inModule, inModule]
dimnames(modTOM) = list(modProbes, modProbes)
# Export the network into edge and node list files Cytoscape can read
cyt = exportNetworkToCytoscape(modTOM,
                               edgeFile = paste("CytoscapeInput-edges-", paste(modules, collapse="-"), ".txt", sep=""),
                               nodeFile = paste("CytoscapeInput-nodes-", paste(modules, collapse="-"), ".txt", sep=""),
                               weighted = TRUE,
                               threshold = 0.5,
                               nodeNames = modProbes,
                               nodeAttr = moduleColors[inModule])
# Save results
write_csv(cyt$edgeData, "Cytoscape_file_myc_lightyellow.csv")