Hello everyone,
I am doing the gene co-expression network using WGCNA, and want to export the network to Cytoscape so that I can see modules in Cytoscape.
I only have gene expression data, fpkm, and NO trait data. I used following commands. It works well except the last step exportNetworkToCytoscape I got errors at the exportNetworkToCytoscape step, as following:
"TOM = TOMsimilarityFromExpr(datExpr0, power = 6); modules = c("brown", "red"); probes = names(datExpr0) inModule = is.finite(match(moduleColors, modules)); modProbes = probes[inModule]; modTOM = TOM[inModule, inModule]; dimnames(modTOM) = list(modProbes, modProbes); cyt = exportNetworkToCytoscape( modTOM, edgeFile = paste("CytoscapeInput-edges-", paste(modules, collapse="-"), ".txt", sep=""), nodeFile = paste("CytoscapeInput-nodes-", paste(modules, collapse="-"), ".txt", sep=""), weighted = TRUE, threshold = 0.02, nodeNames = modProbes, nodeAttr = moduleColors[inModule]);"
'Error in exportNetworkToCytoscape(modTOM, edgeFile = paste("CytoscapeInput-edges-", : Cannot determine node names: nodeNames is NULL and adjMat has no dimnames.'
Thanks for any helps.
Best wishes.
Chen
My full commands are as following:
-----------------------------------------------Part 1---------------------------------------
1 Data input, cleaning and pre-processing
1.a Loading expression data
library(WGCNA) library(flashClust) enableWGCNAThreads() getwd(); workingDir = "C:/cygwin64/home/FemaleLiver-Data"; # The working dir you want setwd(workingDir); getwd(); options(stringsAsFactors = FALSE); original_pfkm = read.csv("cly_3.txt", header=TRUE, sep="\t"); # cly_3.txt LiverFemale3600.csv dim(original_pfkm); names(original_pfkm); datExpr0 = as.data.frame(t(original_pfkm[, -c(1)])); # remove the first column names(datExpr0) = original_pfkm$substanceBXH; rownames(datExpr0) = names(original_pfkm)[-c(1)];
******* The name 'datExpr0' will be used in following many times.
1.b Chekcing for excessiving missing values and identification of outlier microarray samples
Check for genes and samples with too many missing values
gsg = goodSamplesGenes(datExpr0, verbose = 3); gsg$allOK
If the last command return TRUE, all genes passed the cuts. Otherwise, removing the offending genes and samples using following commands
if (!gsg$allOK) {
Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0) printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", "))); if (sum(!gsg$goodSamples)>0) printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")));
Remove the offending genes and samples from the data:
datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes] }
sampleTree = hclust(dist(datExpr0), method = "average");
Plot the sample tree: Open a graphic output window of size 12 by 9 inches
The user should change the dimensions if the window is too large or too small.
sizeGrWindow(12,9) pdf(file = "C:/cygwin64/home/FemaleLiver-Data/Plots/sampleClustering.pdf", width = 12, height = 9); # You can save the pdf par(cex = 0.6); par(mar = c(0,4,2,0)) plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
<h6>**Remove the sampling with any obvirous outliers manually from the input data</h6>Now datExpr0 can be used for gene co-expression analysis
Save data for next step
save(datExpr0, file = "pfkm-dataInput.RData")
-------------------------------------------------Part 2------------------------------------------
-----------
2.b Step by step network constructioin and module detection
2.b.1 Choosing the soft-thresholding power: analysis of network topology
powers = c(c(1:10), seq(from = 12, to=20, by=2))
Call the network topology analysis function
sft = pickSoftThreshold(datExpr0, powerVector = powers, verbose = 5)
Plot the results:
sizeGrWindow(9, 5) par(mfrow = c(1,2)); cex1 = 0.9;
Scale-free topology fit index as a function of the soft-thresholding power
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])sft$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence")); text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])sft$fitIndices[,2], labels=powers,cex=cex1,col="red");
this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="red")
Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5], xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n", main = paste("Mean connectivity")) text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
2.b.2 Co-expression similarity and adjacency
softPower = 6; # calculate using thresholding power 6 adjacency = adjacency(datExpr0, power = softPower);
2.b.3 Topological Overlap Matrix (TOM)
Trun adjacency to topological overlap
TOM = TOMsimilarity(adjacency); dissTOM = 1-TOM
2.b.4 Clustering using TOM
Call the hierarchical clustering function
geneTree = hclust(as.dist(dissTOM), method = "average");
Plot the resulting clustering tree (dendrogram)
sizeGrWindow(12,9) plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity", labels = FALSE, hang = 0.04);
We like large modules, so we set the minimum module size relatively high:
minModuleSize = 30;
Module identification using dynamic tree cut:
dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM, deepSplit = 2, pamRespectsDendro = FALSE, minClusterSize = minModuleSize); table(dynamicMods) # Give the module information
Convert numeric lables into colors
dynamicColors = labels2colors(dynamicMods) table(dynamicColors)
Plot the dendrogram and colors underneath
sizeGrWindow(8,6) plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05, main = "Gene dendrogram and module colors")
2.b.5 Merging of modules whose expression profiles are very similarity
Calculate eigengenes
MEList = moduleEigengenes(datExpr0, colors = dynamicColors) MEs = MEList$eigengenes
Calculate dissimilarity of module eigengenes
MEDiss = 1-cor(MEs);
Cluster module eigengenes
METree = hclust(as.dist(MEDiss), method = "average");
Plot the result
sizeGrWindow(7, 6) plot(METree, main = "Clustering of module eigengenes", xlab = "", sub = "")
Merge the modules
MEDissThres = 0.25 # choose height cut of 0.25, corresponding to correlation of 0.75, to merge
Plot the cut line into the dendrogram
abline(h=MEDissThres, col = "red")
Call an automatic merging function
merge = mergeCloseModules(datExpr0, dynamicColors, cutHeight = MEDissThres, verbose = 3)
The merged module colors
mergedColors = merge$colors;
Eigengenes of the new merged modules:
mergedMEs = merge$newMEs;
Compare the original and merged module
sizeGrWindow(12, 9)
pdf(file = "Plots/geneDendro-3.pdf", wi = 9, he = 6)
plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors), c("Dynamic Tree Cut", "Merged dynamic"), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)
dev.off()
<h6>################### Will get pdf about the comparison</h6>Use the merged module colors in mergeColors, and save relevant variables for use in subsequent analyses
Rename to moduleColors
moduleColors = mergedColors
Construct numerical labels corresponding to the colors
colorOrder = c("grey", standardColors(50)); moduleLabels = match(moduleColors, colorOrder)-1; MEs = mergedMEs;
Save module colors and labels for use in subsequent parts
save(MEs, moduleLabels, moduleColors, geneTree, file = "pfkm-2-networkConstruction-stepByStep.RData")
---------------------------------------------Part 5-----------------------------------------
5 Network visualization using WGCNA functions
loading data
lnames = load(file = "pfkm-dataInput.RData"); lnames = load(file = "pfkm-networkConstruction-stepByStep.RData"); lnames nGenes = ncol(datExpr0) nSamples = nrow(datExpr0)
5.a Visualizing the gene network
Calculate topological overlap anew: this could be done more efficiently by saving the TOM
calculated during module detection, but let us do it again here.
dissTOM = 1-TOMsimilarityFromExpr(datExpr0, power = 6);
Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
plotTOM = dissTOM^7;
Set diagonal to NA for a nicer plot
diag(plotTOM) = NA;
Call the plot function
sizeGrWindow(9,9) TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")
Only select some of the genes for heatmap plot
nSelect = 100
For reproducibility, we set the random seed
set.seed(10); select = sample(nGenes, size = nSelect); selectTOM = dissTOM[select, select];
There’s no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.
selectTree = hclust(as.dist(selectTOM), method = "average") selectColors = moduleColors[select];
Open a graphical window
sizeGrWindow(9,9)
Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing
the color palette; setting the diagonal to NA also improves the clarity of the plot
plotDiss = selectTOM^7; diag(plotDiss) = NA; TOMplot(plotDiss, selectTree, selectColors, main = "Network heatmap plot, selected genes")
5.b Visualizing the network of eigengenes
Recalculate module eigengenes
MEs = moduleEigengenes(datExpr0, moduleColors)$eigengenes MET = orderMEs(cbind(MEs))
Plot the dendrogram
sizeGrWindow(6,6); par(cex = 1.0) plotEigengeneNetworks(MET, "Eigengene dendrogram", marDendro = c(0,4,2,0), plotHeatmaps = FALSE)
Plot the heatmap matrix of denderogram
par(cex = 1.0) plotEigengeneNetworks(MET, "Eigengene adjacency heatmap", marHeatmap = c(3,4,2,2), plotDendrograms = FALSE, xLabelsAngle = 90)
-------------------------------------------Part 6---------------------------------------------
Exporting a gene network to external visualization software such as Cytospcae
Loading data
lnames = load(file = "pfkm-dataInput.RData");
The variable lnames contains the names of loaded variables.
lnames
Load network data saved in the second part.
lnames = load(file = "pfkm-2-networkConstruction-stepByStep.RData"); lnames
6.b export to Cytoscape
Recalculate topological overlap if needed
TOM = TOMsimilarityFromExpr(datExpr0, power = 6);
modules = c("brown", "red");
Select module probes
probes = names(datExpr0) inModule = is.finite(match(moduleColors, modules)); modProbes = probes[inModule];
Select the corresponding Topological Overlap
modTOM = TOM[inModule, inModule]; dimnames(modTOM) = list(modProbes, modProbes);
Export the network into edge and node list files Cytoscape can read
cyt = exportNetworkToCytoscape( modTOM, edgeFile = paste("CytoscapeInput-edges-", paste(modules, collapse="-"), ".txt", sep=""), nodeFile = paste("CytoscapeInput-nodes-", paste(modules, collapse="-"), ".txt", sep=""), weighted = TRUE, threshold = 0.02, nodeNames = modProbes, nodeAttr = moduleColors[inModule]);
Thanks, I replaced the "modProbes = probes[inModule]" using "modProbes = names(probes)[inModule]". However, it still get the same error.
First, if you want to reply my answer use the "ADD COMMENT", below it (or below the comment).
Did you check that probes has really names? see
head(probes)
I tried the head(probes). It got NULL
Is it possible that problem is in 'probes = names(datExpr0)'?
Thanks very much
So your nodeNames are NULL. Probably, but ask your supervisor or a college.
OK. Thanks for suggestions!!