Filtering, Summerizing, and plotting from big data frame
2
0
Entering edit mode
5.9 years ago
WUSCHEL ▴ 810

Hi, I have a big data frame for omics data. Samples are named as Genotype_Time_Replicate (e.g. AOX_1h_4).

E.g. data set

structure(list(ID = c("AT5G54740.1", "AT5G55730.2", "AT5G57655.2", "AT5G64100.1", "AT5G64260.1", "AT5G67360.1", "AT1G30630.1", "AT1G62380.1", "AT1G70830.1", "AT3G14990.1", "AT4G18800.1", "AT4G24510.1", "AT5G15650.1", "AT5G19820.1", "AT5G59840.1", "AT5G47200.1", "AT1G12840.1", "AT1G76030.1", "AT1G78900.2", "AT3G42050.1", "AT4G11150.1", "AT1G11860.2", "AT1G17290.1" ), Location = c("extracellular", "extracellular", "extracellular", "extracellular", "extracellular", "extracellular", "golgi", "golgi", "golgi", "golgi", "golgi", "golgi", "golgi", "golgi", "golgi", "ER", "ER", "ER", "mitochondrion", "mitochondrion", "mitochondrion", "mitochondrion", "mitochondrion"), AOX_1h_1 = c(0.844651873, 0.50954096, 1.12e-08, 0.012981372, 0.978148381, 0.027579578, 0.068010151, 0.410629215, 0.253838635, 0.033631788, 0.335713512, 0.982799013, 0.025910457, 0.793810264, 0.762431665, 0.152154436, 0.027114103, 0.000227, 1.07e-05, 0.721209032, 0.086281162, 0.483130711, 0.014795515), AOX_1h_2 = c(0.894623378, 0.011521413, 1.62e-06, 0.085249729, 0.02863972, 0.956962154, 0.225208718, 0.932679767, 0.002574192, 0.071700671, 0.233682544, 0.936572874, 1.12e-05, 0.241658735, 0.865205515, 0.000537, 0.103471292, 8.66e-07, 1.22e-08, 0.950878446, 0.145012176, 0.092919172, 0.599713247), AOX_1h_3 = c(0.880951025, 0.00145276, 8.59e-10, 0.087023475, 0.675527672, 0.765543306, 0.305860948, 0.899172011, 0.020973476, 0.542988545, 0.735571562, 0.157569324, 0.025488075, 0.071006507, 0.262324019, 0.080470612, 0.0436526, 6.65e-09, 5.63e-10, 0.020557091, 0.069577215, 0.005502212, 0.852099232), AOX_1h_4 = c(0.980823252, 0.158123518, 0.00210702, 0.006317657, 0.30496173, 0.489709702, 0.091469807, 0.958443361, 0.015583593, 0.566165972, 0.66746161, 0.935102341, 0.087733288, 0.744313619, 0.021169383, 0.633250945, 0.257489406, 0.024345088, 0.000355, 0.226279179, 0.004038493, 0.479275204, 0.703522761), AOX_2h_1 = c(0.006474022, 0.246530998, 5.38e-06, 0.47169153, 0.305973663, 0.466202566, 0.191733645, 0.016121487, 0.234839116, 0.043866023, 0.089819656, 0.107934599, 2.09e-06, 0.413229678, 0.464078018, 0.004118766, 0.774970986, 3.79e-07, 2.3e-10, 0.428591262, 0.002326292, 0.385580707, 0.106216066), AOX_2h_2 = c(0.166169729, 0.005721199, 7.77e-08, 0.099146712, 0.457164663, 0.481987525, 7.4e-05, 0.969805081, 0.100894997, 0.062103337, 0.095718425, 0.001686206, 0.009710516, 0.134651787, 0.887036569, 0.459218152, 0.074576369, 3.88e-09, 3.31e-15, 0.409645805, 0.064874307, 0.346371524, 0.449444779), AOX_2h_3 = c(1.06e-05, 0.576589898, 4.03e-08, 0.787468189, 0.971119601, 0.432593753, 0.000274, 0.86932399, 0.08657663, 4.22e-06, 0.071190008, 0.697384316, 0.161623604, 0.422628778, 0.299545652, 0.767867006, 0.00295567, 0.078724176, 4.33e-09, 0.988576028, 0.080278831, 0.66505527, 0.014158693), AOX_2h_4 = c(0.010356719, 0.026506539, 9.48e-09, 0.91009296, 0.302464488, 0.894377768, 0.742233323, 0.75032613, 0.175841127, 0.000721, 0.356904918, 0.461234653, 1.08e-05, 0.65800831, 0.360085919, 0.004814238, 0.174670947, 0.004246734, 7.31e-11, 0.778725214, 0.051334623, 0.10212841, 0.155831664 ), AOX_6h_1 = c(0.271681878, 0.004822226, 1.87e-11, 0.616969208, 0.158860224, 0.684690326, 0.011798791, 0.564591916, 0.000314, 4.79e-06, 0.299871385, 0.001909713, 0.00682428, 0.039107415, 0.574143284, 0.061532691, 0.050483892, 2.28e-08, 1.92e-12, 0.058747794, 0.027147473, 0.196608218, 0.513693112), AOX_6h_2 = c(5.72e-12, 0.719814288, 0.140016259, 0.927094438, 0.841229414, 0.224510089, 0.026567282, 0.242981965, 0.459311076, 0.038295888, 0.127935565, 0.453746728, 0.005023732, 0.554532387, 0.280899096, 0.336458018, 0.002024021, 0.793915731, 0.012838565, 0.873716549, 0.10097853, 0.237426815, 0.003711539), AOX_6h_3 = c(3.16e-12, 0.780424491, 0.031315419, 0.363891436, 0.09562579, 0.104833988, 3.52e-05, 0.104196756, 0.870952423, 0.002036134, 0.016480622, 0.671475063, 2.3e-05, 0.00256744, 0.66263641, 0.005026601, 0.57280276, 0.058724117, 6.4e-10, 0.030965264, 0.005301006, 0.622027012, 0.371659724), AOX_6h_4 = c(7.99e-10, 0.290847169, 0.001319424, 0.347344795, 0.743846306, 0.470908425, 0.00033, 0.016149973, 0.080036584, 0.020899676, 0.00723071, 0.187288769, 0.042514886, 0.00150443, 0.059344154, 0.06554177, 0.112601764, 0.000379, 2.36e-10, 0.78131093, 0.105861995, 0.174370801, 0.05570041 ), WT_1h_1 = c(0.857, 0.809, 2.31e-05, 0.286, 0.87, 0.396, 0.539, 0.787, 0.73, 0.427, 0.764, 0.87, 0.386, 0.852, 0.848, 0.661, 0.393, 0.0415, 0.00611, 0.843, 0.576, 0.804, 0.304 ), WT_1h_2 = c(0.898, 0.509, 0.0192, 0.729, 0.616, 0.902, 0.811, 0.9, 0.343, 0.712, 0.814, 0.901, 0.0446, 0.816, 0.896, 0.217, 0.747, 0.0143, 0.000964, 0.901, 0.776, 0.737, 0.876 ), WT_1h_3 = c(0.939, 0.627, 0.0104, 0.867, 0.932, 0.935, 0.91, 0.939, 0.803, 0.926, 0.934, 0.888, 0.813, 0.859, 0.905, 0.864, 0.838, 0.0223, 0.00917, 0.802, 0.858, 0.724, 0.938 ), WT_1h_4 = c(0.911, 0.782, 0.298, 0.396, 0.837, 0.871, 0.727, 0.91, 0.506, 0.88, 0.89, 0.909, 0.723, 0.896, 0.547, 0.887, 0.824, 0.566, 0.175, 0.814, 0.348, 0.869, 0.893), WT_2h_1 = c(0.748, 0.911, 0.231, 0.929, 0.917, 0.928, 0.903, 0.801, 0.909, 0.849, 0.878, 0.884, 0.183, 0.925, 0.928, 0.719, 0.941, 0.108, 0.00817, 0.926, 0.678, 0.923, 0.884), WT_2h_2 = c(0.935, 0.851, 0.163, 0.925, 0.951, 0.952, 0.63, 0.963, 0.926, 0.916, 0.925, 0.804, 0.868, 0.931, 0.961, 0.951, 0.92, 0.0706, 0.000265, 0.95, 0.917, 0.947, 0.951), WT_2h_3 = c(0.0197, 0.894, 0.000613, 0.911, 0.922, 0.877, 0.122, 0.916, 0.739, 0.0125, 0.718, 0.905, 0.801, 0.875, 0.852, 0.91, 0.302, 0.729, 0.00015, 0.923, 0.731, 0.902, 0.504), WT_2h_4 = c(0.696, 0.765, 0.0142, 0.931, 0.893, 0.931, 0.925, 0.925, 0.87, 0.45, 0.899, 0.908, 0.144, 0.921, 0.899, 0.631, 0.87, 0.62, 0.0014, 0.926, 0.807, 0.844, 0.865), WT_6h_1 = c(0.898, 0.727, 0.00395, 0.921, 0.881, 0.924, 0.776, 0.919, 0.542, 0.234, 0.901, 0.67, 0.747, 0.83, 0.919, 0.848, 0.841, 0.056, 0.00144, 0.846, 0.815, 0.888, 0.916), WT_6h_2 = c(2.38e-09, 0.88, 0.708, 0.898, 0.891, 0.768, 0.443, 0.777, 0.843, 0.505, 0.695, 0.842, 0.208, 0.859, 0.794, 0.813, 0.14, 0.887, 0.326, 0.894, 0.661, 0.775, 0.182), WT_6h_3 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), WT_6h_4 = c(0.0357, 0.953, 0.792, 0.956, 0.967, 0.96, 0.711, 0.892, 0.931, 0.899, 0.866, 0.946, 0.917, 0.799, 0.925, 0.927, 0.938, 0.72, 0.025, 0.967, 0.936, 0.945, 0.923)), class = "data.frame", row.names = c(NA, -23L))

I want to summarize data for each organelle (averaged by organelle and samples' replicates) and plot the Wildtype and mutant data side by side with standard error for each time point.

E.g. Plot
https://ibb.co/f0pb17r (bar / dot plot)

And also, plot another the fold change of mutant compared to its' reference WT (e.g. AOX_1h_4/WT_1h_4)

How can I use Tidyverse / relevant R package for averaging the organelle and replicate data in this kind of DF for downstream analysis?

R RNA-Seq gene proteomics • 1.4k views
ADD COMMENT
5
Entering edit mode
5.8 years ago
Prakash ★ 2.2k

See, if this help

library(ggplot2)
library(ggpubr)
library(reshape2)

# a is list created from your post 
df <- do.call(cbind.data.frame, a)
melted <- melt(df)
head(melted)
melted$variable<- str_replace_all(melted$variable, '_[0-9]$', '')
melted$variable <- factor(melted$variable,levels=c("WT_1h","AOX_1h","WT_2h","AOX_2h","WT_6h","AOX_6h"))
my_comparisons <- list( c("WT_1h","AOX_1h"), c("WT_2h","AOX_2h"),c("WT_6h","AOX_6h"))
ggbarplot(melted, x = "variable", y = "value", add = "mean_se",
          color = "variable", palette =  c("grey","black","grey","black","grey","black"),
          facet.by = "Location")+
  stat_compare_means(comparisons = my_comparisons, label = "p.signif")

enter image description here

https://ibb.co/XDXwGTv

ADD COMMENT
0
Entering edit mode

Thank you very much, Prakash.

ADD REPLY
1
Entering edit mode
5.8 years ago

It seems to me that you're looking for the aggregate() function. There's also the summarize() function in the Hmisc package.
EDIT: For more ideas, check this blog post.

ADD COMMENT
0
Entering edit mode

Thank you very much Heriche.

ADD REPLY

Login before adding your answer.

Traffic: 1896 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6