Hello Biostars Community,
I have a visualization problem in R (library: ggplot2). The problem is: I have multiple variables: F1, F2, and F3 which are the fermenters, and _T0, W1, W5, etc., which are the different time points. For each sample, I have a taxonomic distribution. The file looks like this:
structure(list(Phyla = c("Actinobacteria", "Actinobacteria",
"Bacteroidia", "Bacteroidia", "Bacteroidia", "Bacteroidia", "Bacteroidia",
"Bacteroidia", "Bacteroidia", "Bacteroidia", "Bacteroidia", "Firmicutes",
"Firmicutes", "Firmicutes", "Firmicutes", "Firmicutes", "Firmicutes",
"Firmicutes", "Firmicutes", "Firmicutes", "Firmicutes", "Firmicutes",
"Firmicutes", "Firmicutes", "Firmicutes", "Firmicutes", "Firmicutes",
"Firmicutes", "Firmicutes", "Firmicutes", "Firmicutes", "Firmicutes",
"Methanobacteria", "Methanomicrobia", "Methanomicrobia", "Methanomicrobia",
"Methanomicrobia", "Proteobacteria", "Synergistetes", "Tenericutes"
), Taxa = c("Propionibacteriaceae ", "Propionimicrobium sp000413315",
"Pelotomaculum sp. DTU098", "Bacteroidales", "UBA7646 sp002411385",
"Bacteroidales", "Marinilabiliaceae", "UBA1402 sp002305085",
"UBA1179 sp002340405", "Dysgonomonadaceae", "SR-FBR-E99 sp002409145",
"Clostridia", "Clostridia", "DTU014 sp900016865", "Clostridiales bacterium DTU012",
"DTU010 sp900018335", "UBA1062 sp001896555", "CAG-74", "DTU010",
"Lenti-01 sp002304915", "Herbinix luporum", "UBA3906 sp002391555",
"UBA3943 sp002385625", "Firmicutes", "RUG521 sp900319675", "UBA2557 sp900019985",
"UBA4923 sp002427535", "Clostridia", "UBA4923 sp002427535", "UBA1361 sp002306335",
"UBA4175 sp002379855", "Clostridiales bacterium DTU010", "Methanobacterium sp002496805",
"Methanothrix soehngenii ", "Methanosarcina mazei", "Methanosarcina flavescens",
"Methanoculleus bourgensis", "Thiopseudomonas denitrificans",
"Acetomicrobium flavidum", "Mollicutes"), F1_T0 = c(NA, NA, NA,
NA, NA, 1.32224538, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1.401235467,
NA, NA, NA, 1.625362358, 1.301532511, NA, NA, 1.714799299, NA,
NA, 1.287748037, 1.810523145, 1.504879518, NA, 1.492576565, 1.27245986,
1.416039464, NA, 1.36717996, NA, 1.98274309, 1.521726277, NA,
NA), F1_W1 = c(NA, NA, 1.534940644, NA, 1.634194351, 1.35090771,
NA, 1.628195387, 1.778196127, 1.401069176, 1.593599712, 1.986216267,
1.428164135, 1.430153888, NA, 1.402850974, NA, NA, NA, 1.60350853,
1.287522991, 1.542275391, NA, NA, 1.506279733, 1.584583327, 1.254971574,
1.776536245, 1.456327925, 1.752640377, 1.500469439, 1.302754491,
1.428997648, NA, 1.344208705, NA, 1.972330771, NA, 1.865354176,
NA), F1_W5 = c(1.384749898, NA, NA, NA, NA, NA, NA, 1.641458659,
1.854157038, NA, NA, NA, NA, 1.543863735, NA, 1.592487242, NA,
NA, NA, 1.852434696, NA, NA, NA, NA, NA, NA, NA, NA, 1.57838521,
NA, 1.728172432, 1.554899366, 1.621028946, NA, NA, NA, NA, NA,
NA, NA), F1_W9 = c(1.286435019, NA, NA, NA, NA, NA, NA, 1.587678678,
1.800616934, NA, NA, NA, NA, NA, NA, 1.451352126, 1.489037393,
1.52182326, NA, 1.707602644, 1.377020997, NA, NA, NA, NA, 1.601946774,
NA, NA, 1.553749325, 1.754574724, NA, 1.377190846, 1.505873722,
1.509802123, NA, NA, NA, NA, 1.916735633, 2.432302764), F1_W12 = c(1.348047863,
1.85429967, NA, 1.523515128, NA, NA, NA, 1.757121867, 1.826724859,
NA, NA, NA, NA, NA, NA, 1.491285999, 1.52254006, 1.611882468,
1.671598949, 1.811125548, 1.433493234, 1.60765422, NA, 1.902597225,
NA, 1.650771973, NA, NA, NA, NA, NA, 1.464168234, 1.502836367,
1.402456566, NA, NA, NA, NA, NA, 2.435835198), F2_T0 = c(NA,
NA, NA, NA, NA, 1.35777688, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1.436959196, NA, NA, NA, 1.665121152, 1.330635006, NA, NA, 1.925552538,
NA, NA, 1.35606217, NA, 1.545698654, NA, 1.536684622, 1.30724592,
1.567639964, NA, NA, NA, 1.992284554, NA, NA, NA), F2_W1 = c(NA,
NA, NA, NA, NA, 1.29458793, NA, NA, NA, NA, NA, NA, NA, 1.486758161,
NA, 1.401711351, NA, NA, NA, 1.607233531, 1.294592635, NA, NA,
NA, NA, NA, 1.322557962, 1.901422955, 1.499458299, NA, 1.564013519,
1.290805698, 1.539214595, NA, NA, NA, 2.028195356, NA, NA, NA
), F2_W5 = c(1.28880052, NA, NA, NA, NA, 1.336973507, NA, 1.59206685,
NA, NA, NA, NA, NA, 1.393368796, NA, 1.356932545, NA, NA, NA,
1.664091839, 1.319554243, NA, NA, 1.90249252, NA, 1.460583211,
NA, NA, 1.483530845, 1.84084724, NA, 1.266968962, 1.505185519,
NA, NA, NA, NA, NA, 1.910838936, NA), F2_W9 = c(1.332999457,
NA, NA, NA, NA, 1.457267861, NA, NA, NA, NA, NA, NA, NA, 1.420406792,
1.320894906, 1.391459169, NA, NA, NA, NA, 1.335632046, NA, 1.424633183,
NA, NA, 1.467406456, NA, NA, NA, 1.643685477, NA, 1.257788913,
1.531570478, NA, NA, 1.429859926, NA, NA, 1.915235966, NA), F2_W12 = c(1.441020711,
NA, NA, NA, NA, 1.495250201, NA, NA, NA, NA, NA, NA, NA, 1.56312457,
1.397228445, 1.460322737, NA, NA, NA, NA, 1.375757268, NA, NA,
NA, NA, 1.456947025, NA, NA, NA, NA, NA, 1.296279009, NA, NA,
NA, 1.495215947, NA, NA, 1.867096117, NA), F3_T0 = c(NA, NA,
NA, NA, NA, 1.429323128, NA, NA, NA, NA, NA, NA, NA, NA, NA,
1.455722203, NA, NA, NA, 1.718003747, 1.392588831, NA, NA, NA,
NA, NA, 1.356220502, NA, 1.565199789, NA, 1.55575983, 1.345545624,
1.500451591, NA, 1.500199495, NA, NA, NA, NA, NA), F3_W1 = c(NA,
NA, NA, NA, NA, 1.320879332, NA, NA, NA, NA, NA, NA, NA, 1.482776758,
NA, 1.388997787, NA, NA, NA, 1.673917499, 1.368638525, NA, NA,
NA, NA, NA, 1.349849673, NA, 1.474563588, NA, 1.540069991, 1.312563143,
1.449777629, NA, 1.454260755, NA, NA, NA, NA, NA), F3_W5 = c(1.408800752,
NA, NA, NA, NA, 1.393866166, NA, 1.611592149, NA, NA, NA, NA,
NA, 1.499389718, NA, 1.514390464, NA, NA, NA, 1.789185049, 1.45166967,
NA, NA, 1.846304354, NA, 1.610155551, NA, NA, 1.547560239, NA,
1.652475407, 1.44206416, 1.550771289, NA, NA, NA, NA, NA, NA,
NA), F3_W9 = c(1.31765229, 1.537537472, NA, NA, NA, NA, 1.743853171,
1.604897968, NA, NA, NA, NA, NA, 1.57755427, NA, 1.485500654,
1.552382789, NA, NA, 1.781759095, 1.381713893, NA, NA, NA, NA,
1.56779468, NA, NA, 1.640680681, 1.875227459, 1.713920833, 1.368358762,
1.557888238, NA, NA, NA, NA, NA, 1.880423281, NA), F3_W12 = c(1.265383685,
1.421076492, NA, NA, NA, NA, NA, 1.751081853, NA, NA, NA, NA,
NA, NA, NA, 1.444736886, NA, NA, NA, 1.676557286, 1.36341842,
NA, NA, NA, NA, 1.520168447, NA, NA, NA, 1.932829896, 1.629893889,
1.353767174, 1.444430709, 1.387145142, NA, NA, NA, NA, NA, NA
)), class = "data.frame", row.names = c(NA, -40L))
I would like to make from this data a jittered boxplot figure, where the x-axis is ordered by the "Phyla" and the y axis represents the values of the different distributions of phyla with boxplots and jitter, where the average of the point of "Methanothrix soehngenii" is labelled whit its name.
Hi, could you please edit your post and provide the example data via
dput
rather than plain text, as plain text requires the user to type things down. dput allows simple copy/pasting directly into R.Hi, I tried the "dput" script and the result is inserted instead of the plain text.