Entering edit mode
2.4 years ago
KABILAN
▴
130
I have a dataset like below,
df1<-structure(list(Uniprot_IDs = c("P0A799|PGK", "P0A853|TNAA", "P0CE47|EFTU1",
"P0A6F3|GLPK", "P0A6F5|CH60", "P0A9B2|G3P1", "P0A853|TNAA", "P0A6P1|EFTS",
"P0A6P1|EFTS", "P0A799|PGK"), `1_3ng` = c(12305960196.5721, 24169710612.0476,
NA, 8553811608.70032, 13176265141.6301, 92994780469.5607, 11373139178.993,
NA, 8062061247.94512, 3484150815.20598), `2_3ng` = c(11629654800,
25162283400, 31864546300, 8157173240, 12812379370, 90007498700,
10191440110, NA, 7911370530, 3406054010), `3_3ng` = c(12503938417.8663,
25733015601.0117, 34727094361.2997, 8857104380.18179, NA, 93988723611.341,
11653192532.4546, NA, 7933102839.01341, NA), `4_7-5ng` = c(NA,
79582218995.1549, 77615759060.3497, 21749287984.8341, 33342436650.5148,
101254055758.836, 30624750667.6451, 39438567251.7351, 10726988796.4798,
7850501475.22747), `5_7-5ng` = c(NA, 78743355495.2545, 81948536416.9992,
NA, 34617564902.3219, 99485017820.8478, NA, 40420212151.9563,
14804870783.7792, 8280398872.03417), `6_7-5ng` = c(NA, 80272416055.8845,
77019098847.8474, 23045479130.9574, 32885483296.8046, 90789109337.1181,
30678346321.0037, 37073444001.0421, 13710097518.7425, 7916821420.64152
), `7_10ng` = c(22617928037.5148, 97473230025.8853, 91579176089.4265,
28086665669.9634, 38033883000.8102, NA, 37181868033.5073, 44274304023.6936,
NA, 9288965106.5049), `8_10ng` = c(22091136513.3736, NA, 90754802145.7813,
26405368418.6503, 36442770423.3661, NA, 36789459227.7515, 42793252584.0984,
15307787846.1716, 8834742124.86943), `9_10ng` = c(24125219176.3177,
98420360686.1339, 99355131865.2305, 28271975548.9608, 39837381317.8216,
NA, 39481996086.9157, 47261977623.5276, 16463020175.2068, 9931809132.696
), `10_15ng` = c(30252776887.1842, 141726904178.35, 130889671408.26,
38206477283.6549, 56021084469.4745, 100336249543.662, 53295491175.4506,
62883519160.5278, NA, 13994955303.4972), `11_15ng` = c(32859283128.8916,
161633827056.573, NA, 45497410866.4248, 61586094337.2513, NA,
60508117975.6097, 73276218943.4545, NA, 15400735421.5), `12_15ng` = c(34372085877.8071,
165557046117.222, 153975644961.53, 46279635074.4959, 61867667358.3367,
106133922907.254, 63526552497.161, 76374667334.5682, NA, 15329671283.3959
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-10L))
And a group data,
groups <- structure(list(samples = c("1_3ng", "2_3ng", "3_3ng", "4_7-5ng",
"5_7-5ng", "6_7-5ng", "7_10ng", "8_10ng", "9_10ng", "10_15ng",
"11_15ng", "12_15ng"), groups = c("GrA", "GrA", "GrA", "GrB",
"GrB", "GrB", "GrC", "GrC", "GrC", "GrD", "GrD", "GrD")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -12L))
And I have used the following codes for removing the groupwise missing rows from the dataset.
new_colnames <- c("name", "group")
colnames(groups)<-new_colnames
x <- colnames(df1[,1])
df2 <- setNames(df1,replace(names(df1),names(df1)==x,"rowid"))
com_data <- df2 %>%
pivot_longer(!rowid, values_to="mass") %>%
inner_join(groups, by="name") %>%
group_by(name) %>%
filter(!all(is.na(mass))) %>%
ungroup() %>%
add_count(rowid) %>%
filter(n == max(n)) %>%
select(!c(group, n)) %>%
pivot_wider(names_from=name, values_from=mass)
But I am not getting the desired output, it was like below,
structure(list(rowid = c("P0A799|PGK", "P0A853|TNAA", "P0A6P1|EFTS"
), `1_3ng` = list(c(12305960196.5721, 3484150815.20598), c(24169710612.0476,
11373139178.993), c(NA, 8062061247.94512)), `2_3ng` = list(c(11629654800,
3406054010), c(25162283400, 10191440110), c(NA, 7911370530)),
`3_3ng` = list(c(12503938417.8663, NA), c(25733015601.0117,
11653192532.4546), c(NA, 7933102839.01341)), `4_7-5ng` = list(
c(NA, 7850501475.22747), c(79582218995.1549, 30624750667.6451
), c(39438567251.7351, 10726988796.4798)), `5_7-5ng` = list(
c(NA, 8280398872.03417), c(78743355495.2545, NA), c(40420212151.9563,
14804870783.7792)), `6_7-5ng` = list(c(NA, 7916821420.64152
), c(80272416055.8845, 30678346321.0037), c(37073444001.0421,
13710097518.7425)), `7_10ng` = list(c(22617928037.5148, 9288965106.5049
), c(97473230025.8853, 37181868033.5073), c(44274304023.6936,
NA)), `8_10ng` = list(c(22091136513.3736, 8834742124.86943
), c(NA, 36789459227.7515), c(42793252584.0984, 15307787846.1716
)), `9_10ng` = list(c(24125219176.3177, 9931809132.696),
c(98420360686.1339, 39481996086.9157), c(47261977623.5276,
16463020175.2068)), `10_15ng` = list(c(30252776887.1842,
13994955303.4972), c(141726904178.35, 53295491175.4506),
c(62883519160.5278, NA)), `11_15ng` = list(c(32859283128.8916,
15400735421.5), c(161633827056.573, 60508117975.6097), c(73276218943.4545,
NA)), `12_15ng` = list(c(34372085877.8071, 15329671283.3959
), c(165557046117.222, 63526552497.161), c(76374667334.5682,
NA))), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-3L))
And the warning message was,
Warning message:
Values from `mass` are not uniquely identified; output will contain list-cols.
* Use `values_fn = list` to suppress this warning.
* Use `values_fn = {summary_fun}` to summarise duplicates.
* Use the following dplyr code to identify duplicates.
{data} %>%
dplyr::group_by(rowid, name) %>%
dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
dplyr::filter(n > 1L)
How to get the dataframe output which contain single values in each column instead of list-cols.
How would you like to deal with duplicate values ?