Weighted correlation network analysis enables analysis of co-expression networks. We used WGCNA
to detect co-expressed milRNAs in Hordeum vulgare and Blumeria graminis f.sp. hordei.
Loading required packages.
require(WGCNA)
require(tidyverse)
Importing data.
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE)
# import data
TPM_Data = read.table("tpm_Bgh.csv", sep = ",");
feature_table <- read.csv("design.txt", sep = "\t")
Parse data.
# convert column to rownames
TPM_Data <- column_to_rownames(TPM_Data, var = "V1")
# assign feature table to column names
names(TPM_Data) <- feature_table$sampleid
# transpose table
TPM_Data <- t(TPM_Data)
Clustering samples and removing outliers.
# Plot the sample tree
sampleTree = hclust(dist(TPM_Data), method = "average")
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
cex.axis = 1.5, cex.main = 2)
# Determine cluster under the line
clust = cutreeStatic(sampleTree, cutHeight = 15000, minSize = 10)
table(clust)
# clust 0 contains the samples we want to keep.
keepSamples = (clust==0)
datExpr = TPM_Data[keepSamples, ]
Thresholding clusters.
### data
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
### convey feature_table to datTraits
datTraits <- feature_table
datTraits <- column_to_rownames(datTraits, var = "sampleid")
str(datTraits)
## 'data.frame': 18 obs. of 6 variables:
## $ EPI: int 1 1 1 0 0 0 0 0 0 0 ...
## $ HAU: int 0 0 0 1 1 1 0 0 0 0 ...
## $ P40: int 0 0 0 0 0 0 1 1 1 0 ...
## $ EVN: int 0 0 0 0 0 0 0 0 0 1 ...
## $ EVY: int 0 0 0 0 0 0 0 0 0 0 ...
## $ MYC: int 0 0 0 0 0 0 0 0 0 0 ...
# Re-cluster samples
### dist() will calculate the PCC number among rows of matrix
sampleTree2 = hclust(dist(datExpr), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry
traitColors = numbers2colors(datTraits, signed = FALSE)
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree2, traitColors,
groupLabels = names(datTraits),
main = "Sample dendrogram and trait heatmap")
# Choose a set of soft-thresholding powers
powers = c(c(1:10), seq(from = 12, to=20, by=2))
# Call the network topology analysis function
### verbose simply defines how "talky" a function is so how much status messages it prints to screen.
### This has no effect on performance or output.
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
# Plot the results:
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red")
abline(h=0.90,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
Clustering data.
sft$powerEstimate
## [1] NA
net = blockwiseModules(datExpr, power = 12, ### need to be changed
TOMType = "signed", minModuleSize = 30, ### need to be changed
reassignThreshold = 0, mergeCutHeight = 0.25,
numericLabels = TRUE, pamRespectsDendro = FALSE,
saveTOMs = TRUE,
saveTOMFileBase = "barleyTOMfilter",
verbose = 3)
# Convert labels to colors for plotting
mergedColors = labels2colors(net$colors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs;
geneTree = net$dendrograms[[1]]
# Define numbers of genes and samples
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
# Recalculate MEs with color labels
MEs0 = moduleEigengenes(datExpr, moduleColors)$eigengenes
MEs = orderMEs(MEs0)
moduleTraitCor = cor(MEs, datTraits, use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
#
# Will display correlations and their p-values
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "")
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(datTraits),
yLabels = names(MEs),
ySymbols = names(MEs),
colorLabels = FALSE,
colors = greenWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = FALSE,
cex.text = 0.1,
zlim = c(-1,1),
main = paste("Module-trait relationships"))
Creating output tables.
# Define variable weight containing the weight column of datTrait
time_points_EPI = as.data.frame(datTraits$`EPI`)
names(time_points_EPI) = "EPI"
# names (colors) of the modules
modNames = substring(names(MEs), 3)
geneModuleMembership = as.data.frame(cor(datExpr, MEs, use = "p"))
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples))
#
names(geneModuleMembership) = paste("MM", modNames, sep="")
names(MMPvalue) = paste("p.MM", modNames, sep="")
# Gene trait significance calculation
geneTraitSignificance = as.data.frame(cor(datExpr, time_points_EPI, use = "p"));
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples));
#
names(geneTraitSignificance) = paste("GS.", names(time_points_EPI), sep="");
names(GSPvalue) = paste("p.GS.", names(time_points_EPI), sep="");
head(geneTraitSignificance)
## GS.EPI
## BghMilRNA_00001 0.29556495
## BghMilRNA_00002 0.04548864
## BghMilRNA_00003 -0.09952972
## BghMilRNA_00004 0.30044379
## BghMilRNA_00005 -0.10846523
## BghMilRNA_00006 -0.01677757
# print results
module = "cyan"
column = match(module, modNames)
moduleGenes = moduleColors==module
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
abs(geneTraitSignificance[moduleGenes, 1]),
xlab = paste("Module Membership in", module, "module"),
ylab = "Gene significance for body weight",
main = paste("Module membership vs. gene significance\n"),
cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
Calculate topological overlap again, refine networks.
# Calculate topological overlap anew: this could be done more efficiently by saving the TOM
dissTOM = 1-TOMsimilarityFromExpr(datExpr, power = 16)
# Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
plotTOM = dissTOM^7
# Set diagonal to NA for a nicer plot
diag(plotTOM) = NA
# Plot TOM
TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")
#
# For reproducibility, we set the random seed
set.seed(10)
select = sample(nGenes, size = nSelect)
selectTOM = dissTOM[select, select]
# There's no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.
selectTree = hclust(as.dist(selectTOM), method = "average")
selectColors = moduleColors[select]
# Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing the color palette; setting the diagonal to NA also improves the clarity of the plot
plotDiss = selectTOM^7
diag(plotDiss) = NA
TOMplot(plotDiss, selectTree, selectColors, main = "Network heatmap plot, selected genes")
# Recalculate module eigengenes
# MEs = moduleEigengenes(datExpr, moduleColors)$eigengenes
# Isolate weight from the clinical traits
# weight = as.data.frame(datTraits$weight_g);
# names(weight) = "weight"
# Add the weight to existing module eigengenes
MET = orderMEs(cbind(MEs, time_points_EPI))
# Plot the relationships among the eigengenes and the trait
plotEigengeneNetworks(MET, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle
= 90)
# Plot the dendrogram
plotEigengeneNetworks(MET, "Eigengene dendrogram", marDendro = c(0,4,2,0),
plotHeatmaps = FALSE)
# Plot the heatmap matrix (note: this plot will overwrite the dendrogram plot)
plotEigengeneNetworks(MET, "Eigengene adjacency heatmap", marHeatmap = c(3,4,2,2),
plotDendrograms = FALSE, xLabelsAngle = 90)
# Recalculate topological overlap if needed
TOM = TOMsimilarityFromExpr(datExpr, power = 16)
# Select modules
modules = c("lightyellow")
# Select module probes
inModule = is.finite(match(moduleColors, modules))
modProbes = colnames(datExpr)[inModule]
# Select the corresponding Topological Overlap
modTOM = TOM[inModule, inModule]
dimnames(modTOM) = list(modProbes, modProbes)
# Export the network into edge and node list files Cytoscape can read
cyt = exportNetworkToCytoscape(modTOM,
edgeFile = paste("CytoscapeInput-edges-", paste(modules, collapse="-"), ".txt", sep=""),
nodeFile = paste("CytoscapeInput-nodes-", paste(modules, collapse="-"), ".txt", sep=""),
weighted = TRUE,
threshold = 0.5,
nodeNames = modProbes,
nodeAttr = moduleColors[inModule])
# Save results
write_csv(cyt$edgeData, "Cytoscape_file_myc_lightyellow.csv")