How can I separate column names in to different columns to perform downstream analysis?
2
1
Entering edit mode
6.5 years ago
WUSCHEL ▴ 810

I have a big data matrix and each column has named with multiple information and separated by an underscore. e.g.: Genotype, Tissue, Time, Treatment, and Replication (e.g.WT_Shoot_0t_NTrt_1)

A sample of my data frame;

structure(list(Proteins = c("SnrK", "MAPKK", "PP2C"), WT_Shoot_0t_NTrt_1 = c(0.580784899, 1.210078166, 1.505880218), WT_Shoot_0t_NTrt_2 = c(0.957816536, 1.42644091, 0.943047498), WT_Shoot_0t_NTrt_3 = c(0.559338535, 1.481513748, 1.114371918), WT_Shoot_1t_Trt_1 = c(0.831382253, 1.478551276, 0.837832395), WT_Shoot_1t_Trt_2 = c(1.180515054, 1.445100969, 1.18151722), WT_Shoot_1t_Trt_3 = c(1.332735497, 1.515484415, 0.99774335), WT_root_0t_NTrt_1 = c(1.717008073, 2.048229681, 1.448233358), WT_root_0t_NTrt_2 = c(1.431501693, 1.850835296, 1.128499829), WT_root_0t_NTrt_3 = c(1.752086402, 2.047380811, 1.190984777), WT_root_1t_Trt_1 = c(1.368684187, 1.507348975, 1.531142731), WT_root_1t_Trt_2 = c(1.204974777, 1.440904968, 1.103257306), WT_root_1t_Trt_3 = c(0.996016342, 1.630774074, 1.141581901), mut1_Shoot_0t_NTrt_1 = c(1.05451186, 1.916352545, 1.030983014), mut1_Shoot_0t_NTrt_2 = c(1.54792871, 1.676837161, 1.244400719), mut1_Shoot_0t_NTrt_3 = c(1.318611728, 1.613611, 1.28740667), mut1_Shoot_1t_Trt_1 = c(1.551790106, 1.619609895, 1.097308351), mut1_Shoot_1t_Trt_2 = c(1.638951097, 1.437759761, 1.139143972), mut1_Shoot_1t_Trt_3 = c(1.18670455, 1.530006726, 1.583110853), mut1_root_0t_NTrt_1 = c(0.981436287, 0.5156177, 0.799418798), mut1_root_0t_NTrt_2 = c(1.143837649, 0.772921721, 1.098218628), mut1_root_0t_NTrt_3 = c(1.163352788, 1.371823855, 1.278531528), mut1_root_1t_Trt_1 = c(1.13334394, 0.768721169, 1.155071974), mut1_root_1t_Trt_2 = c(1.015317761, 0.838696502, 0.9622491), mut1_root_1t_Trt_3 = c(1.961461109, 0.697184247, 0.926734427)), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame"), spec = structure(list(cols = list(Proteins = structure(list(), class = c("collector_character", "collector")), WT_Shoot_0t_NTrt_1 = structure(list(), class = c("collector_double", "collector")), WT_Shoot_0t_NTrt_2 = structure(list(), class = c("collector_double", "collector")), WT_Shoot_0t_NTrt_3 = structure(list(), class = c("collector_double", "collector")), WT_Shoot_1t_Trt_1 = structure(list(), class = c("collector_double", "collector")), WT_Shoot_1t_Trt_2 = structure(list(), class = c("collector_double", "collector")), WT_Shoot_1t_Trt_3 = structure(list(), class = c("collector_double", "collector")), WT_root_0t_NTrt_1 = structure(list(), class = c("collector_double", "collector")), WT_root_0t_NTrt_2 = structure(list(), class = c("collector_double", "collector")), WT_root_0t_NTrt_3 = structure(list(), class = c("collector_double", "collector")), WT_root_1t_Trt_1 = structure(list(), class = c("collector_double", "collector")), WT_root_1t_Trt_2 = structure(list(), class = c("collector_double", "collector")), WT_root_1t_Trt_3 = structure(list(), class = c("collector_double", "collector")), mut1_Shoot_0t_NTrt_1 = structure(list(), class = c("collector_double", "collector")), mut1_Shoot_0t_NTrt_2 = structure(list(), class = c("collector_double", "collector")), mut1_Shoot_0t_NTrt_3 = structure(list(), class = c("collector_double", "collector")), mut1_Shoot_1t_Trt_1 = structure(list(), class = c("collector_double", "collector")), mut1_Shoot_1t_Trt_2 = structure(list(), class = c("collector_double", "collector")), mut1_Shoot_1t_Trt_3 = structure(list(), class = c("collector_double", "collector")), mut1_root_0t_NTrt_1 = structure(list(), class = c("collector_double", "collector")), mut1_root_0t_NTrt_2 = structure(list(), class = c("collector_double", "collector")), mut1_root_0t_NTrt_3 = structure(list(), class = c("collector_double", "collector")), mut1_root_1t_Trt_1 = structure(list(), class = c("collector_double", "collector")), mut1_root_1t_Trt_2 = structure(list(), class = c("collector_double", "collector")), mut1_root_1t_Trt_3 = structure(list(), class = c("collector_double", "collector"))), default = structure(list(), class = c("collector_guess", "collector"))), class = "col_spec"))

How can I make a table to like below to process downstream statistical analysis (i.e. ANOVA, Tukey) Sample Table

R gene RNA-Seq • 1.7k views
ADD COMMENT
6
Entering edit mode
6.5 years ago
Chirag Parsania ★ 2.0k

Convert from wide to long format, then separate delimited strings to new columns.

library(tidyverse)
dd <- structure(list(Proteins = c("SnrK", "MAPKK", "PP2C"), WT_Shoot_0t_NTrt_1 = c(0.580784899, 1.210078166, 1.505880218), WT_Shoot_0t_NTrt_2 = c(0.957816536, 1.42644091, 0.943047498), WT_Shoot_0t_NTrt_3 = c(0.559338535, 1.481513748, 1.114371918), WT_Shoot_1t_Trt_1 = c(0.831382253, 1.478551276, 0.837832395), WT_Shoot_1t_Trt_2 = c(1.180515054, 1.445100969, 1.18151722), WT_Shoot_1t_Trt_3 = c(1.332735497, 1.515484415, 0.99774335), WT_root_0t_NTrt_1 = c(1.717008073, 2.048229681, 1.448233358), WT_root_0t_NTrt_2 = c(1.431501693, 1.850835296, 1.128499829), WT_root_0t_NTrt_3 = c(1.752086402, 2.047380811, 1.190984777), WT_root_1t_Trt_1 = c(1.368684187, 1.507348975, 1.531142731), WT_root_1t_Trt_2 = c(1.204974777, 1.440904968, 1.103257306), WT_root_1t_Trt_3 = c(0.996016342, 1.630774074, 1.141581901), mut1_Shoot_0t_NTrt_1 = c(1.05451186, 1.916352545, 1.030983014), mut1_Shoot_0t_NTrt_2 = c(1.54792871, 1.676837161, 1.244400719), mut1_Shoot_0t_NTrt_3 = c(1.318611728, 1.613611, 1.28740667), mut1_Shoot_1t_Trt_1 = c(1.551790106, 1.619609895, 1.097308351), mut1_Shoot_1t_Trt_2 = c(1.638951097, 1.437759761, 1.139143972), mut1_Shoot_1t_Trt_3 = c(1.18670455, 1.530006726, 1.583110853), mut1_root_0t_NTrt_1 = c(0.981436287, 0.5156177, 0.799418798), mut1_root_0t_NTrt_2 = c(1.143837649, 0.772921721, 1.098218628), mut1_root_0t_NTrt_3 = c(1.163352788, 1.371823855, 1.278531528), mut1_root_1t_Trt_1 = c(1.13334394, 0.768721169, 1.155071974), mut1_root_1t_Trt_2 = c(1.015317761, 0.838696502, 0.9622491), mut1_root_1t_Trt_3 = c(1.961461109, 0.697184247, 0.926734427)), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame"), spec = structure(list(cols = list(Proteins = structure(list(), class = c("collector_character", "collector")), WT_Shoot_0t_NTrt_1 = structure(list(), class = c("collector_double", "collector")), WT_Shoot_0t_NTrt_2 = structure(list(), class = c("collector_double", "collector")), WT_Shoot_0t_NTrt_3 = structure(list(), class = c("collector_double", "collector")), WT_Shoot_1t_Trt_1 = structure(list(), class = c("collector_double", "collector")), WT_Shoot_1t_Trt_2 = structure(list(), class = c("collector_double", "collector")), WT_Shoot_1t_Trt_3 = structure(list(), class = c("collector_double", "collector")), WT_root_0t_NTrt_1 = structure(list(), class = c("collector_double", "collector")), WT_root_0t_NTrt_2 = structure(list(), class = c("collector_double", "collector")), WT_root_0t_NTrt_3 = structure(list(), class = c("collector_double", "collector")), WT_root_1t_Trt_1 = structure(list(), class = c("collector_double", "collector")), WT_root_1t_Trt_2 = structure(list(), class = c("collector_double", "collector")), WT_root_1t_Trt_3 = structure(list(), class = c("collector_double", "collector")), mut1_Shoot_0t_NTrt_1 = structure(list(), class = c("collector_double", "collector")), mut1_Shoot_0t_NTrt_2 = structure(list(), class = c("collector_double", "collector")), mut1_Shoot_0t_NTrt_3 = structure(list(), class = c("collector_double", "collector")), mut1_Shoot_1t_Trt_1 = structure(list(), class = c("collector_double", "collector")), mut1_Shoot_1t_Trt_2 = structure(list(), class = c("collector_double", "collector")), mut1_Shoot_1t_Trt_3 = structure(list(), class = c("collector_double", "collector")), mut1_root_0t_NTrt_1 = structure(list(), class = c("collector_double", "collector")), mut1_root_0t_NTrt_2 = structure(list(), class = c("collector_double", "collector")), mut1_root_0t_NTrt_3 = structure(list(), class = c("collector_double", "collector")), mut1_root_1t_Trt_1 = structure(list(), class = c("collector_double", "collector")), mut1_root_1t_Trt_2 = structure(list(), class = c("collector_double", "collector")), mut1_root_1t_Trt_3 = structure(list(), class = c("collector_double", "collector"))), default = structure(list(), class = c("collector_guess", "collector"))), class = "col_spec"))
==========
dd %>% 
  gather(var, response, WT_Shoot_0t_NTrt_1:mut1_root_1t_Trt_3) %>% 
  separate(var, c("Genotype", "Tissue", "Time", "Trtment", "Replication"), sep = "_") %>%
  arrange(desc(Proteins))


# A tibble: 72 x 7
   Proteins Genotype Tissue Time  Trtment Replication response
   <chr>    <chr>    <chr>  <chr> <chr>   <chr>          <dbl>
 1 SnrK     WT       Shoot  0t    NTrt    1              0.581
 2 SnrK     WT       Shoot  0t    NTrt    2              0.958
 4 SnrK     WT       Shoot  1t    Trt     1              0.831
 5 SnrK     WT       Shoot  1t    Trt     2              1.18

UPDATE

with tidyr 1.0.0 no need to use separate

dd %>% tidyr::pivot_longer(cols = WT_Shoot_0t_NTrt_1:mut1_root_1t_Trt_3 ,
                    names_to = c("Genotype", "Tissue", "Time", "Trtment", "Replication"),
                    values_to = "response",names_sep = "_")
ADD COMMENT
1
Entering edit mode

The script looks tempting. But can the transformed tibble be converted into a data.frame in r?

ADD REPLY
0
Entering edit mode

Yes, using as.data.frame()

ADD REPLY
0
Entering edit mode

Thank you, Chirag! This is what I exactly looking for. Thank a lot :)

ADD REPLY
2
Entering edit mode
6.5 years ago
tokeemdtareq ▴ 40

I am relatively new in data analysis, maybe there is another faster way. But as a fellow biologist, I thought it would be good idea to represent a customized answer to the problem. I loaded your data as a data.frame in R, naming dat1.

dat1 = as.data.frame(structure(list(Proteins = c("SnrK", "MAPKK", "PP2C"), WT_Shoot_0t_NTrt_1 = c(0.580784899, 1.210078166, 1.505880218), WT_Shoot_0t_NTrt_2 = c(0.957816536, 1.42644091, 0.943047498), WT_Shoot_0t_NTrt_3 = c(0.559338535, 1.481513748, 1.114371918), WT_Shoot_1t_Trt_1 = c(0.831382253, 1.478551276, 0.837832395), WT_Shoot_1t_Trt_2 = c(1.180515054, 1.445100969, 1.18151722), WT_Shoot_1t_Trt_3 = c(1.332735497, 1.515484415, 0.99774335), WT_root_0t_NTrt_1 = c(1.717008073, 2.048229681, 1.448233358), WT_root_0t_NTrt_2 = c(1.431501693, 1.850835296, 1.128499829), WT_root_0t_NTrt_3 = c(1.752086402, 2.047380811, 1.190984777), WT_root_1t_Trt_1 = c(1.368684187, 1.507348975, 1.531142731), WT_root_1t_Trt_2 = c(1.204974777, 1.440904968, 1.103257306), WT_root_1t_Trt_3 = c(0.996016342, 1.630774074, 1.141581901), mut1_Shoot_0t_NTrt_1 = c(1.05451186, 1.916352545, 1.030983014), mut1_Shoot_0t_NTrt_2 = c(1.54792871, 1.676837161, 1.244400719), mut1_Shoot_0t_NTrt_3 = c(1.318611728, 1.613611, 1.28740667), mut1_Shoot_1t_Trt_1 = c(1.551790106, 1.619609895, 1.097308351), mut1_Shoot_1t_Trt_2 = c(1.638951097, 1.437759761, 1.139143972), mut1_Shoot_1t_Trt_3 = c(1.18670455, 1.530006726, 1.583110853), mut1_root_0t_NTrt_1 = c(0.981436287, 0.5156177, 0.799418798), mut1_root_0t_NTrt_2 = c(1.143837649, 0.772921721, 1.098218628), mut1_root_0t_NTrt_3 = c(1.163352788, 1.371823855, 1.278531528), mut1_root_1t_Trt_1 = c(1.13334394, 0.768721169, 1.155071974), mut1_root_1t_Trt_2 = c(1.015317761, 0.838696502, 0.9622491), mut1_root_1t_Trt_3 = c(1.961461109, 0.697184247, 0.926734427)), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame"), spec = structure(list(cols = list(Proteins = structure(list(), class = c("collector_character", "collector")), WT_Shoot_0t_NTrt_1 = structure(list(), class = c("collector_double", "collector")), WT_Shoot_0t_NTrt_2 = structure(list(), class = c("collector_double", "collector")), WT_Shoot_0t_NTrt_3 = structure(list(), class = c("collector_double", "collector")), WT_Shoot_1t_Trt_1 = structure(list(), class = c("collector_double", "collector")), WT_Shoot_1t_Trt_2 = structure(list(), class = c("collector_double", "collector")), WT_Shoot_1t_Trt_3 = structure(list(), class = c("collector_double", "collector")), WT_root_0t_NTrt_1 = structure(list(), class = c("collector_double", "collector")), WT_root_0t_NTrt_2 = structure(list(), class = c("collector_double", "collector")), WT_root_0t_NTrt_3 = structure(list(), class = c("collector_double", "collector")), WT_root_1t_Trt_1 = structure(list(), class = c("collector_double", "collector")), WT_root_1t_Trt_2 = structure(list(), class = c("collector_double", "collector")), WT_root_1t_Trt_3 = structure(list(), class = c("collector_double", "collector")), mut1_Shoot_0t_NTrt_1 = structure(list(), class = c("collector_double", "collector")), mut1_Shoot_0t_NTrt_2 = structure(list(), class = c("collector_double", "collector")), mut1_Shoot_0t_NTrt_3 = structure(list(), class = c("collector_double", "collector")), mut1_Shoot_1t_Trt_1 = structure(list(), class = c("collector_double", "collector")), mut1_Shoot_1t_Trt_2 = structure(list(), class = c("collector_double", "collector")), mut1_Shoot_1t_Trt_3 = structure(list(), class = c("collector_double", "collector")), mut1_root_0t_NTrt_1 = structure(list(), class = c("collector_double", "collector")), mut1_root_0t_NTrt_2 = structure(list(), class = c("collector_double", "collector")), mut1_root_0t_NTrt_3 = structure(list(), class = c("collector_double", "collector")), mut1_root_1t_Trt_1 = structure(list(), class = c("collector_double", "collector")), mut1_root_1t_Trt_2 = structure(list(), class = c("collector_double", "collector")), mut1_root_1t_Trt_3 = structure(list(), class = c("collector_double", "collector"))), default = structure(list(), class = c("collector_guess", "collector"))), class = "col_spec")))

Here goes my code, your desired output is given as the dat4

library(reshape2)
dat2 = melt(dat1, value.name = "measurement")
dat2$sl = c(1:72)
dat3 = data.frame(do.call('rbind', strsplit(as.character(dat2$variable), '_', fixed = F)))
colnames(dat3) = c("genotype", "tissue", "time", "treatment", "replication")
dat3$sl = c(1:72)
dat4 = subset(merge(dat2, dat3, by = "sl"), select = c("Proteins", "genotype", "tissue", "time", "treatment", "replication", "measurement"))
ADD COMMENT
0
Entering edit mode

Thank you for the help!

ADD REPLY

Login before adding your answer.

Traffic: 1694 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6