Entering edit mode
2.5 years ago
biology_inform
▴
50
Hi all, I am stuck at the first step of WGCNA. I have dataset with 12 samples. When I draw the
library(WGCNA)
library(tidyverse)
library(magrittr)
library(tibble)
getwd()
mydataDirect <- ".."
setwd(mydataDirect)
GSE121634_HCC827_counts <- read_table("GSE121634_HCC827_counts.txt",
col_types = cols(Chr = col_skip(), Start = col_skip(),
End = col_skip(), Strand = col_skip(),
Length = col_skip()), skip = 1)
GSE121634_HCC827_counts <- data.frame(GSE121634_HCC827_counts, row.names = 1)
cpm_wOutfilter_rowname <- as.data.frame(apply(GSE121634_HCC827_counts, 2,function(x) (x/sum(x))*10^6)) #Calculating CPM values of counts
cpm_wOutfilter_rowname <- tibble::rownames_to_column(cpm_wOutfilter_rowname, "Geneid")
datExpr1 = as.data.frame(t(cpm_wOutfilter_rowname[]));
names(datExpr1) = cpm_wOutfilter_rowname$Geneid;
rownames(datExpr1) = names(cpm_wOutfilter_rowname)[];
datExpr1 = datExpr1[-1,]
datExpr1 <- data.frame(lapply(datExpr1,as.numeric))
rownames(datExpr1) <- colnames(GSE121634_HCC827_counts)
gsg = goodSamplesGenes(datExpr1, verbose = 3);
gsg$allOK
if (!gsg$allOK)
{
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0)
printFlush(paste("Removing genes:", paste(names(datExpr1)[!gsg$goodGenes], collapse = ", ")));
if (sum(!gsg$goodSamples)>0)
printFlush(paste("Removing samples:", paste(rownames(datExpr1)[!gsg$goodSamples], collapse = ", ")));
# Remove the offending genes and samples from the data:
datExpr1 = datExpr1[gsg$goodSamples, gsg$goodGenes]
}
sampleTree = hclust(dist(datExpr1), method = "average");
# Plot the sample tree: Open a graphic output window of size 12 by 9 inches
# The user should change the dimensions if the window is too large or too small.
sizeGrWindow(12,9)
#pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9);
par(cex = 0.6);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
cex.axis = 1.5, cex.main = 2)
# Plot a line to show the cut
abline(h = 15000, col = "red");
# Determine cluster under the line
clust = cutreeStatic(sampleTree, cutHeight = 10000, minSize = 10000)
table(clust)
# clust 1 contains the samples we want to keep.
keepSamples = (clust==1)
datExpr = datExpr1[keepSamples, ]
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
The resulting graph's (shown below), y axis is comprised of 10000 15000 20000 and so on. But in the tutorial and examples, y axis was different.
Also when I am trying to do clustering
clust = cutreeStatic(sampleTree, cutHeight = 10000, minSize = 10000)
It gives me clust as 0 0 0 0 0 0 0 0 0 0 0 0, so there is nothing to cluster
Different data > different values. That's it
Because
minSize = 10000
, Please, read whatminSize
does: link