I have an issue with WGCNA analysis on RNA-seq data. I have an error message when calculating co-expression similarity and adjacency at the stage of network construction and module detection. So when I input the function:
adjacency = adjacency(datExpr, power = softPower)
the following errror appears: Error in cor(datExpr, use = "p") : REAL() can only be applied to a 'numeric', not a 'integer'
The is my first time at using WGCNA, any advice would be appreciated. Thanks in advance. This is the code for R 3.3.2:
library("WGCNA")
options(stringsAsFactors = FALSE)
femData = read.csv("Counts ALL-2-VP.csv")
dim(femData)
names(femData)
datExpr0 = as.data.frame(t(femData[, -c(1:1)]))
names(datExpr0) = femData$EnsemblID
rownames(datExpr0) = names(femData)[-c(1:1)]
View(datExpr0)
View(femData)
gsg = goodSamplesGenes(datExpr0, verbose = 3)
gsg$allOK
if (!gsg$allOK) {if (sum(!gsg$goodGenes)>0) printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", "))); if (sum(!gsg$goodSamples)>0) printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", "))); datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes] } sampleTree = hclust(dist(datExpr0), method = "average")
sizeGrWindow(12,9)
par(cex = 0.6)
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
datExpr = datExpr0
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
save(datExpr, file = "FemaleLiver-01-dataInput.RData")
lnames = load(file = "FemaleLiver-01-dataInput.RData")
lnames
disableWGCNAThreads()
powers = c(c(1:10), seq(from = 12, to=20, by=2))
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
sizeGrWindow(9, 5)
par(mfrow = c(1,2))
cex1 = 0.9
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])sft$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence")); text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])sft$fitIndices[,2], labels=powers,cex=cex1,col="red")
abline(h=0.78,col="red")
plot(sft$fitIndices[,1], sft$fitIndices[,5], xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n", main = paste("Mean connectivity")) text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
softPower = 12
adjacency = adjacency(datExpr, power = softPower)
The sessionInfo:
sessionInfo function (package = NULL) { z <- list()
z$R.version <- R.Version()
z$platform <- z$R.version$platform
if (nzchar(.Platform$r_arch))
z$platform <- paste(z$platform, .Platform$r_arch, sep = "/")
z$platform <- paste0(z$platform, " (", 8 * .Machine$sizeof.pointer,
"-bit)")
z$locale <- Sys.getlocale()
if (.Platform$OS.type == "windows") {
z$running <- win.version()
}
else if (nzchar(Sys.which("uname"))) {
uname <- system("uname -a", intern = TRUE)
os <- sub(" .*", "", uname)
z$running <- switch(os, Linux = if (file.exists("/etc/os-release")) {
tmp <- readLines("/etc/os-release")
t2 <- if (any(startsWith(tmp, "PRETTY_NAME="))) sub("^PRETTY_NAME=",
"", grep("^PRETTY_NAME=", tmp, value = TRUE)[1L]) else if (any(startsWith(tmp,
"NAME"))) sub("^NAME=", "", grep("^NAME=", tmp,
value = TRUE)[1L]) else "Linux (unknown distro)"
sub("\"(.*)\"", "\\1", t2)
} else if (file.exists("/etc/system-release")) {
readLines("/etc/system-release")
}, Darwin = {
ver <- readLines("/System/Library/CoreServices/SystemVersion.plist")
ind <- grep("ProductUserVisibleVersion", ver)
ver <- ver[ind + 1L]
ver <- sub(".*<string>", "", ver)
ver <- sub("</string>$", "", ver)
ver1 <- strsplit(ver, ".", fixed = TRUE)[[1L]][2L]
sprintf("%s %s %s", ifelse(as.numeric(ver1) < 12,
"OS X", "macOS"), switch(ver1, `4` = "Tiger",
`5` = "Leopard", `6` = "Snow Leopard", `7` = "Lion",
`8` = "Mountain Lion", `9` = "Mavericks", `10` = "Yosemite",
`11` = "El Capitan", `12` = "Sierra", ""), ver)
}, SunOS = {
ver <- system("uname -r", intern = TRUE)
paste("Solaris", strsplit(ver, ".", fixed = TRUE)[[1L]][2L])
}, uname)
}
if (is.null(package)) {
package <- grep("^package:", search(), value = TRUE)
keep <- vapply(package, function(x) x == "package:base" ||
!is.null(attr(as.environment(x), "path")), NA)
package <- .rmpkg(package[keep])
}
pkgDesc <- lapply(package, packageDescription, encoding = NA)
if (length(package) == 0)
stop("no valid packages were specified")
basePkgs <- sapply(pkgDesc, function(x) !is.null(x$Priority) &&
x$Priority == "base")
z$basePkgs <- package[basePkgs]
if (any(!basePkgs)) {
z$otherPkgs <- pkgDesc[!basePkgs]
names(z$otherPkgs) <- package[!basePkgs]
}
loadedOnly <- loadedNamespaces()
loadedOnly <- loadedOnly[!(loadedOnly %in% package)]
if (length(loadedOnly)) {
names(loadedOnly) <- loadedOnly
pkgDesc <- c(pkgDesc, lapply(loadedOnly, packageDescription))
z$loadedOnly <- pkgDesc[loadedOnly]
}
class(z) <- "sessionInfo"
z
} <bytecode: 0x000000001acd2c88=""> <environment: namespace:utils="">
The error message suggests that some internal function doesn't like to work with integers, most likely you're working with count data. See point 4 of the WGCNA FAQ.
You could have a look if datExpr is the type you expect it is by using e.g.
str(datExpr)
Are you using raw counts or normalized? Can you summarize your workflow to generate "Counts ALL-2-VP.csv"?
Hi, did you already solve the problem?. I have the same problem right now. regards
As already mentioned, read the WGCNA FAQ, in particular the section dealing with RNA seq data. If that doesn't help then post your own question with enough relevant details so that people can help you.