Entering edit mode
12 months ago
hellokwmin
•
0
The code for WGCNA analysis is like this:
vsd = varianceStabilizingTransformation(dds.filter)
wpn_vsd = getVarianceStabilizedData(dds.filter)
rv_wpn = rowVars(wpn_vsd)
summary(rv_wpn)
q75_wpn <- quantile( rowVars(wpn_vsd), .75) # <= original
q95_wpn <- quantile( rowVars(wpn_vsd), .95)
expr_normalized <- wpn_vsd[ rv_wpn > q95_wpn, ]
expr_normalized[1:5,1:10]
dim(expr_normalized)
expr_normalized_df <- data.frame(expr_normalized) %>%
mutate(
Gene_id = row.names(expr_normalized)
) %>%
pivot_longer(-Gene_id)
expr_normalized_df %>% ggplot(., aes(x = name, y = value)) +
geom_violin() +
geom_point() +
theme_bw() +
theme(
axis.text.x = element_text( angle = 90)
) +
ylim(0, NA) +
labs(
title = "Normalized and 95 quantile Expression",
x = "treatment",
y = "normalized expression"
)
### WGCNA
input_mat = t(expr_normalized)
allowWGCNAThreads()
powers = c(c(1:10), seq(from = 12, to = 50, by = 2))
# Call the network topology analysis function
sft = pickSoftThreshold(
input_mat, # <= Input data
#blockSize = 30,
powerVector = powers,
verbose = 5
)
par(mfrow = c(1,2));
cex1 = 0.9;
plot(sft$fitIndices[, 1],
-sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
xlab = "Soft Threshold (power)",
ylab = "Scale Free Topology Model Fit, signed R^2",
main = paste("Scale independence")
)
text(sft$fitIndices[, 1],
-sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
labels = powers, cex = cex1, col = "red"
)
abline(h = 0.90, col = "red")
plot(sft$fitIndices[, 1],
sft$fitIndices[, 5],
xlab = "Soft Threshold (power)",
ylab = "Mean Connectivity",
type = "n",
main = paste("Mean connectivity")
)
text(sft$fitIndices[, 1],
sft$fitIndices[, 5],
labels = powers,
cex = cex1, col = "red")
picked_power = 30
temp_cor <- cor
cor <- WGCNA::cor # Force it to use WGCNA cor function (fix a namespace conflict issue)
netwk <- blockwiseModules(input_mat, # <= input here
# == Adjacency Function ==
power = picked_power, # <= power here
networkType = "signed",
# == Tree and Block Options ==
deepSplit = 2,
pamRespectsDendro = F,
# detectCutHeight = 0.75,
minModuleSize = 30,
maxBlockSize = 11000,
# == Module Adjustments ==
reassignThreshold = 0,
mergeCutHeight = 0.25,
# == TOM == Archive the run results in TOM file (saves time)
saveTOMs = T,
saveTOMFileBase = "ER",
# == Output Options
numericLabels = T,
verbose = 3)
cor <- temp_cor # Return cor function to original namespace
module_eigengenes = netwk$MEs
##### print out a preview
head(module_eigengenes)
## get number of genes for each module
table(netwk$colors)
# Convert labels to colors for plotting
mergedColors = labels2colors(netwk$colors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(
netwk$dendrograms[[1]],
mergedColors[netwk$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE,
hang = 0.03,
addGuide = TRUE,
guideHang = 0.05 )
netwk$colors[netwk$blockGenes[[1]]]
table(netwk$colors)
module_df <- data.frame(
gene_id = names(netwk$colors),
colors = labels2colors(netwk$colors)
)
module_df[1:5,]
write_delim(module_df,
file = "gene_modules_antarctica_OTC.txt",
delim = "\t")
# Get Module Eigengenes per cluster
MEs0 <- moduleEigengenes(input_mat, mergedColors)$eigengenes
# Reorder modules so similar modules are next to each other
MEs0 <- orderMEs(MEs0)
module_order = names(MEs0) %>% gsub("ME","", .)
# Add treatment names
MEs0$treatment = row.names(MEs0)
# tidy & plot data
mME = MEs0 %>%
pivot_longer(-treatment) %>%
mutate(
name = gsub("ME", "", name),
name = factor(name, levels = module_order)
)
mME_plot <- mME %>%
ggplot(aes(x = treatment, y = name, fill = value, label = sprintf("%.2f", value))) +
geom_tile() +
geom_text(size = 3, color = "black", show.legend = FALSE) +
theme_bw() +
scale_fill_gradient2(
low = "blue",
high = "red",
mid = "white",
midpoint = 0,
limit = c(-1, 1)
) +
theme(axis.text.x = element_text(angle = 90)) +
labs(
title = "Module-trait Relationships",
x = "Treatments",
y = "Modules",
fill = "Correlation"
)
After running last code, the result is like this:
And, my question is can I put any p-value as numeric or aesterick? Many tutorial for WGCNA calculated p-values based on trait data which they are interested. But, I am dealing with RNA-seq data wherein only treatment names and corresponding Gene and Gene expression data. I cannot calculate any p-values using my data?
The treatment is a sample trait. But do you really not have any additional metadata about your samples?