Also, I want to add a color key.

Question

HeatMap: how to cluster only the rows and keep order of the heatmap's column labels as same as in the df?

1

Entering edit mode

6.6 years ago

WUSCHEL ▴ 850

I wanna plot a heatmap and cluster only the rows (i.e. genes in this tydf1). Also, wanna keep the order of the heatmap's column labels as same as in the df (i.e. tydf1)?

sampledata;

df1 <- structure(list(Gene = c("AA", "PQ", "XY", "UBQ"), X_T0_R1 = c(1.46559502, 0.220140568, 0.304127515, 1.098842127), X_T0_R2 = c(1.087642983, 0.237500819, 0.319844338, 1.256624804), X_T0_R3 = c(1.424945196, 0.21066267, 0.256496284, 1.467120048), X_T1_R1 = c(1.289943948, 0.207778662, 0.277942721, 1.238400358), X_T1_R2 = c(1.376535013, 0.488774258, 0.362562315, 0.671502431), X_T1_R3 = c(1.833390311, 0.182798731, 0.332856558, 1.448757569), X_T2_R1 = c(1.450753714, 0.247576125, 0.274415259, 1.035410946), X_T2_R2 = c(1.3094609, 0.390028842, 0.352460646, 0.946426593), X_T2_R3 = c(0.5953716, 1.007079177, 1.912258811, 0.827119776), X_T3_R1 = c(0.7906009, 0.730242116, 1.235644748, 0.832287694), X_T3_R2 = c(1.215333041, 1.012914813, 1.086362205, 1.00918082), X_T3_R3 = c(1.069312467, 0.780421013, 1.002313082, 1.031761442), Y_T0_R1 = c(0.053317766, 3.316414959, 3.617213894, 0.788193798), Y_T0_R2 = c(0.506623748, 3.599442788, 1.734075583, 1.179462912), Y_T0_R3 = c(0.713670106, 2.516735845, 1.236204882, 1.075393433), Y_T1_R1 = c(0.740998252, 1.444496448, 1.077023349, 0.869258744), Y_T1_R2 = c(0.648231834, 0.097957459, 0.791438659, 0.428805547), Y_T1_R3 = c(0.780499252, 0.187840968, 0.820430227, 0.51636582), Y_T2_R1 = c(0.35344654, 1.190274584, 0.401845911, 1.223534348), Y_T2_R2 = c(0.220223951, 1.367784148, 0.362815405, 1.102117612), Y_T2_R3 = c(0.432856978, 1.403057729, 0.10802472, 1.304233845), Y_T3_R1 = c(0.234963735, 1.232129062, 0.072433381, 1.203096462), Y_T3_R2 = c(0.353770497, 0.885122768, 0.011662112, 1.188149743), Y_T3_R3 = c(0.396091395, 1.333921747, 0.192594116, 1.838029829), Z_T0_R1 = c(0.398000559, 1.286528398, 0.129147097, 1.452769794), Z_T0_R2 = c(0.384759325, 1.122251177, 0.119475721, 1.385513609), Z_T0_R3 = c(1.582230097, 0.697419716, 2.406671502, 0.477415567), Z_T1_R1 = c(1.136843842, 0.804552001, 2.13213228, 0.989075996), Z_T1_R2 = c(1.275683837, 1.227821594, 0.31900326, 0.835941568), Z_T1_R3 = c(0.963349308, 0.968589683, 1.706670339, 0.807060135), Z_T2_R1 = c(3.765036263, 0.477443352, 1.712841882, 0.469173869), Z_T2_R2 = c(1.901023385, 0.832736132, 2.223429427, 0.593558769), Z_T2_R3 = c(1.407713024, 0.911920317, 2.011259223, 0.692553388), Z_T3_R1 = c(0.988333629, 1.095130142, 1.648598854, 0.629915612), Z_T3_R2 = c(0.618606729, 0.497458337, 0.549147265, 1.249492088), Z_T3_R3 = c(0.429823986, 0.471389536, 0.977124788, 1.136635484)), row.names = c(NA, -4L ), class = c("data.table", "data.frame"))

Scripts used:

library(dplyr) library(stringr) library(tidyr) gdf1 <- gather(df1, "group", "Expression", -Gene) gdf1$tgroup = apply(str_split_fixed(gdf1$group, "_", 3)[, c(1, 2)], 1, paste, collapse ="_") library(dplyr) tydf1 <- gdf1 %>% group_by(Gene, tgroup) %>% summarize(expression_mean = mean(Expression)) %>% spread(., tgroup, expression_mean) library(tidyverse)

*#1 heatmap scripts is being used

tydf1 <- tydf1 %>% as.data.frame() %>% column_to_rownames(var=colnames(tydf1)[1]) library(gplots) library(vegan) randup.m <- as.matrix(tydf1) scaleRYG <- colorRampPalette(c("red","yellow","darkgreen"), space = "rgb")(30) data.dist <-vegdist(randup.m, method = "euclidean") row.clus <-hclust(data.dist, "aver") heatmap.2(randup.m, Rowv = as.dendrogram(row.clus), dendrogram = "row", col=scaleRYG, margins = c(7,10), density.info = "none", trace = "none", lhei = c(2,6), colsep = 1:3, sepcolor = "black", sepwidth =c(0.001,0.0001), xlab = "Identifier", ylab = "Rows")

*#2 Heatmap script is being used

df2 <- as.matrix(tydf1[,-1]) heatmap(df2)

Also, I want to add a color key.

R gene genome • 7.0k views

ADD COMMENT • link 6.6 years ago by WUSCHEL ▴ 850

0

Entering edit mode

function column_to_rownames comes from library textshape and is not loaded in your code. Code throws an error for code: tydf1 <- tydf1 %>% as.data.frame() %>% column_to_rownames(var=colnames(tydf1)[1]). It should be tydf1 <- tydf1 %>% as.data.frame() %>% column_to_rownames(colnames(tydf1)[1]).

ADD REPLY • link 6.6 years ago by cpad0112 21k

0

Entering edit mode

Thank you very much cpad0112. Well you know how I am greatful to you. You have been helping me more than my co-supervisors (even we both doesn't no each other).
Thank you very much!!!

ADD REPLY • link 6.6 years ago by WUSCHEL ▴ 850

1

Entering edit mode

Code above contains unnecessary steps. I cleaned up code a little bit. Starting df (from OP code), try this:

library(dplyr)
library(tidyr)
library(textshape)
library(gplots)

gdf1=gather(df1, "group", "Expression",-Gene) %>%
    separate(group, c("sample", "time", "r")) %>%
    unite(tgroup, c("sample", "time"))  %>%
    group_by(Gene, tgroup) %>%
    summarize(expression_mean = mean(Expression)) %>%
    spread(tgroup, expression_mean) %>%
    column_to_rownames(colnames(.)[1])


col <- colorRampPalette(c("red","yellow","darkgreen"))(30)

heatmap.2(
    as.matrix(gdf1),
    Rowv = F,
    dendrogram = "column",
    col = col,
    margins = c(7, 10),
    trace = "none",
    density.info = "none",
    colsep = 1:ncol(gdf1),
    sepcolor = "black",
    sepwidth = c(0.001, 0.001),
    xlab = "Identifier",
    ylab = "Genes"
)

But this misses relationship between genes.

ps: could you please remove formatting at the end of your post above?

ADD REPLY • link 6.6 years ago by cpad0112 21k

0

Entering edit mode

@ cpad0112, I have several experimental conditions and gonna plot heatmap as same, at this stage I am only interested in how my genotypes cluster in these experimental conditions. Only thing I can do is thank you. Thank you as always.

ADD REPLY • link 6.6 years ago by WUSCHEL ▴ 850

0

Entering edit mode

@ cpad0112, I've tried the same way. But why my color key is not appearing. Even I added, key=TRUE, keysize=1.0, key.xlab = "Ln(Peak Area / T0)", key.ylab="", key.title="" for trying!

ADD REPLY • link 6.6 years ago by WUSCHEL ▴ 850

1

Entering edit mode

@OP: what is your matrix size (dimensions)? what do you mean by color key is not appearing? Does the heatmap render well with colors, but colors in colorkey is not rendered (i.e no color or white) ? or colorkey it self is missing?

ADD REPLY • link 6.6 years ago by cpad0112 21k

0

Entering edit mode

I only play with this df before analyze the data. Just a replicate of your codes. No. Only colorkey. I get the heatmap without it.

ADD REPLY • link 6.6 years ago by WUSCHEL ▴ 850

1

Entering edit mode

Try the code in R, not in rstudio. See if it gives you the color code. Btw, in your code, is there any reason, why first columns are highlighted in heatmap? Also clear all the graphic devices (some thing like dev.off())

ADD REPLY • link 6.6 years ago by cpad0112 21k

0

Entering edit mode

Hi cpad0112, No, there is no any reason to highlight the first columns, that's how this scripts gave the output and do not know how to fix that. Also I've tried these codes for the heatmap, but this also doesn't give the colorkey :(

scaleyellowred <- colorRampPalette(c("lightyellow", "red"), space = "rgb")(100) heatmap(as.matrix(tydf1), Rowv = NA, Colv = NA, col = scaleyellowred) data.dist <- vegdist(tydf1, method = "bray") row.clus <- hclust(data.dist, "aver") heatmap(as.matrix(tydf1), Rowv = as.dendrogram(row.clus), Colv = NA, col = scaleyellowred, margins = c(10, 3)) data.dist.g <- vegdist(t(tydf1), method = "bray") col.clus <- hclust(data.dist.g, "aver") heatmap(as.matrix(tydf1), Rowv = as.dendrogram(row.clus), Colv = as.dendrogram(col.clus), col = scaleyellowred, margins = c(10, 3)) heatmap.2(as.matrix(tydf1), Rowv = as.dendrogram(row.clus), Colv = as.dendrogram(col.clus), col = scaleyellowred, margins = c(8, 10), trace = "none", density.info = "none", xlab = "genera", ylab = "Samples", main = "Heatmap example", lhei = c(2, 8)) heatmap.2(as.matrix(tydf1),Colv=FALSE,lhei = c(0.25,4),density.info="none",scale="none",margin=c(10,10),col = scaleyellowred,symkey=F,trace="none",dendrogram="row",keysize=1.0, key.xlab = "Ln(Peak Area / T0)", key.ylab="", key.title="")

ADD REPLY • link 6.6 years ago by WUSCHEL ▴ 850

1

Entering edit mode

Please follow the steps below without code modification except for working directory modification and let me know your feed back @OP:

setwd("~/Desktop/") ## this would set the working directory. You can change it.
Create a data frame as you posted in very first post. (Please do not modify the df)
Run the code that I posted. Code fix for highlighting first 3 samples is included. Before running the code do step 4 and 5
Before line starting with heatmap.2, insert a new line with following code: png("test.png")
At the end of the code (i.e after the line ylab = "Genes")), include dev.off().
Execute the code and open test.png and let me know if it has color key.

example code below for step 5 and 6:

png("test.png")
heatmap.2(
    as.matrix(gdf1),
    Rowv = F,
    dendrogram = "column",
    col = col,
    margins = c(7, 10),
    trace = "none",
    density.info = "none",
    colsep = 1:ncol(gdf1),
    sepcolor = "black",
    sepwidth = c(0.001, 0.001),
    xlab = "Identifier",
    ylab = "Genes"
)
dev.off()

ADD REPLY • link 6.6 years ago by cpad0112 21k

0

Entering edit mode

Oh Yes! This has the colorkey :D . wow, thanks cpad0112. BTW, was that an issue from my working directory? I am working on uni computer. So my working folders are networked (not local). Also, what is the purpose of this dev.off() syntax? Thank you very much. (now I feel you fed up with my thankings.... ), but I must say thank you!!!

ADD REPLY • link 6.6 years ago by WUSCHEL ▴ 850

1

Entering edit mode

There is no issue with any thing, including code. My understanding is that you are using R front end (may be rstudio) and the graphic window is either small enough or not able to render the image. This is the reason, in my earlier post, I suggested R (from terminal) instead of R front end. Now please work on png part. You probably would want a publication grade image (300 dpi - high resolution, 72 dpi- minimum resolution required for publication/presentation). dev.off() shuts down the graphic device (in normal terms, plot file or plot window). In above case, you have opened a graphic device (as png) and it needs to be shut down as soon as graphing is done.

ADD REPLY • link 6.6 years ago by cpad0112 21k

0

Entering edit mode

Thank you very much cpad0112.

ADD REPLY • link 6.6 years ago by WUSCHEL ▴ 850

1

Entering edit mode

@OP: If issue in OP is addressed, accept it as answer, for future reference.

ADD REPLY • link 6.6 years ago by cpad0112 21k

0

Entering edit mode

Hi cpad0112, again back to you. I'm interested in clustering only the genes (rows), I want to keep the order of my samples as same as in the df. I've used below scripts (of course edited yours :D )

png("test6.png") heatmap.2(as.matrix(gdf1), Rowv = T, Colv = NA, dendrogram = "row", #scale = "row", col = scaleRYG, density.info = "none", trace = "none", margins = c(7, 15) ) dev.off()

However, in my working df, samples are not as X/Y/Z. They are in different names i.e. "WT" "aba1" "aba2" , in the heatmap columns are rearranging in to alphabetical order, i.e. "aba1" "aba2" "WT". How can I stop this, I want to have my wildtype samples in the left. Sorry to bother you! if you have free time help me.

ADD REPLY • link 6.6 years ago by WUSCHEL ▴ 850

0

Entering edit mode

try adding Colv = FALSE instead of Colv = NA to the code and reorder your matrix, before plotting. Using original data:

> gdf1=gather(df1, "group", "Expression",-Gene) %>%
     separate(group, c("sample", "time", "r")) %>%
     mutate(sample= recode(sample, 'X'='WT', 'Y'='aba1', 'Z'='aba2')) %>%
     unite(tgroup, c("sample", "time"))  %>%
     group_by(Gene, tgroup) %>%
     summarize(expression_mean = mean(Expression)) %>%
     spread(tgroup, expression_mean) %>%
     column_to_rownames(colnames(.)[1])

changed the names from x,y,z to WT, aba1 and aba2 in data frame.

> cn=colnames(gdf1)[c(9:12,1:8)]
> cn

 [1] "WT_T0"   "WT_T1"   "WT_T2"   "WT_T3"   "aba1_T0" "aba1_T1" "aba1_T2" "aba1_T3"
 [9] "aba2_T0" "aba2_T1" "aba2_T2" "aba2_T3"

created order of columns I would like to see. I would like to see WT first, aba1 next, followed by aba2. Stored the values in cn.

Now plotting:

col <- colorRampPalette(c("red","yellow","darkgreen"))(30)
heatmap.2(
    as.matrix(gdf1[,cn]),
    Rowv = F,
    Colv = F,
    col = col,
    margins = c(7, 10),
    trace = "none",
    density.info = "none",
    colsep = 0:ncol(gdf1),
    sepcolor = "black",
    sepwidth = c(0.001,1),
    xlab = "Identifier",
    ylab = "Genes",
    dendrogram = "none"
)

After ordering:

@OP: This thread is getting out of hand. If there are further questions unrelated to OP, could you please start a new thread/post? You can link to this post if you wish.