R pivot wider error. listcols output error
0
0
Entering edit mode
2.4 years ago
KABILAN ▴ 130

I have a dataset like below,

df1<-structure(list(Uniprot_IDs = c("P0A799|PGK", "P0A853|TNAA", "P0CE47|EFTU1", 
"P0A6F3|GLPK", "P0A6F5|CH60", "P0A9B2|G3P1", "P0A853|TNAA", "P0A6P1|EFTS", 
"P0A6P1|EFTS", "P0A799|PGK"), `1_3ng` = c(12305960196.5721, 24169710612.0476, 
NA, 8553811608.70032, 13176265141.6301, 92994780469.5607, 11373139178.993, 
NA, 8062061247.94512, 3484150815.20598), `2_3ng` = c(11629654800, 
25162283400, 31864546300, 8157173240, 12812379370, 90007498700, 
10191440110, NA, 7911370530, 3406054010), `3_3ng` = c(12503938417.8663, 
25733015601.0117, 34727094361.2997, 8857104380.18179, NA, 93988723611.341, 
11653192532.4546, NA, 7933102839.01341, NA), `4_7-5ng` = c(NA, 
79582218995.1549, 77615759060.3497, 21749287984.8341, 33342436650.5148, 
101254055758.836, 30624750667.6451, 39438567251.7351, 10726988796.4798, 
7850501475.22747), `5_7-5ng` = c(NA, 78743355495.2545, 81948536416.9992, 
NA, 34617564902.3219, 99485017820.8478, NA, 40420212151.9563, 
14804870783.7792, 8280398872.03417), `6_7-5ng` = c(NA, 80272416055.8845, 
77019098847.8474, 23045479130.9574, 32885483296.8046, 90789109337.1181, 
30678346321.0037, 37073444001.0421, 13710097518.7425, 7916821420.64152
), `7_10ng` = c(22617928037.5148, 97473230025.8853, 91579176089.4265, 
28086665669.9634, 38033883000.8102, NA, 37181868033.5073, 44274304023.6936, 
NA, 9288965106.5049), `8_10ng` = c(22091136513.3736, NA, 90754802145.7813, 
26405368418.6503, 36442770423.3661, NA, 36789459227.7515, 42793252584.0984, 
15307787846.1716, 8834742124.86943), `9_10ng` = c(24125219176.3177, 
98420360686.1339, 99355131865.2305, 28271975548.9608, 39837381317.8216, 
NA, 39481996086.9157, 47261977623.5276, 16463020175.2068, 9931809132.696
), `10_15ng` = c(30252776887.1842, 141726904178.35, 130889671408.26, 
38206477283.6549, 56021084469.4745, 100336249543.662, 53295491175.4506, 
62883519160.5278, NA, 13994955303.4972), `11_15ng` = c(32859283128.8916, 
161633827056.573, NA, 45497410866.4248, 61586094337.2513, NA, 
60508117975.6097, 73276218943.4545, NA, 15400735421.5), `12_15ng` = c(34372085877.8071, 
165557046117.222, 153975644961.53, 46279635074.4959, 61867667358.3367, 
106133922907.254, 63526552497.161, 76374667334.5682, NA, 15329671283.3959
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-10L))

And a group data,

groups <- structure(list(samples = c("1_3ng", "2_3ng", "3_3ng", "4_7-5ng", 
"5_7-5ng", "6_7-5ng", "7_10ng", "8_10ng", "9_10ng", "10_15ng", 
"11_15ng", "12_15ng"), groups = c("GrA", "GrA", "GrA", "GrB", 
"GrB", "GrB", "GrC", "GrC", "GrC", "GrD", "GrD", "GrD")), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -12L))

And I have used the following codes for removing the groupwise missing rows from the dataset.

new_colnames <- c("name", "group")
colnames(groups)<-new_colnames

x <- colnames(df1[,1]) 
df2 <- setNames(df1,replace(names(df1),names(df1)==x,"rowid"))

com_data <- df2 %>%
  pivot_longer(!rowid, values_to="mass") %>%
  inner_join(groups, by="name") %>%
  group_by(name) %>%
  filter(!all(is.na(mass))) %>%
  ungroup() %>%
  add_count(rowid) %>%
  filter(n == max(n)) %>%
  select(!c(group, n)) %>%
  pivot_wider(names_from=name, values_from=mass)

But I am not getting the desired output, it was like below,

structure(list(rowid = c("P0A799|PGK", "P0A853|TNAA", "P0A6P1|EFTS"
), `1_3ng` = list(c(12305960196.5721, 3484150815.20598), c(24169710612.0476, 
11373139178.993), c(NA, 8062061247.94512)), `2_3ng` = list(c(11629654800, 
3406054010), c(25162283400, 10191440110), c(NA, 7911370530)), 
    `3_3ng` = list(c(12503938417.8663, NA), c(25733015601.0117, 
    11653192532.4546), c(NA, 7933102839.01341)), `4_7-5ng` = list(
        c(NA, 7850501475.22747), c(79582218995.1549, 30624750667.6451
        ), c(39438567251.7351, 10726988796.4798)), `5_7-5ng` = list(
        c(NA, 8280398872.03417), c(78743355495.2545, NA), c(40420212151.9563, 
        14804870783.7792)), `6_7-5ng` = list(c(NA, 7916821420.64152
    ), c(80272416055.8845, 30678346321.0037), c(37073444001.0421, 
    13710097518.7425)), `7_10ng` = list(c(22617928037.5148, 9288965106.5049
    ), c(97473230025.8853, 37181868033.5073), c(44274304023.6936, 
    NA)), `8_10ng` = list(c(22091136513.3736, 8834742124.86943
    ), c(NA, 36789459227.7515), c(42793252584.0984, 15307787846.1716
    )), `9_10ng` = list(c(24125219176.3177, 9931809132.696), 
        c(98420360686.1339, 39481996086.9157), c(47261977623.5276, 
        16463020175.2068)), `10_15ng` = list(c(30252776887.1842, 
    13994955303.4972), c(141726904178.35, 53295491175.4506), 
        c(62883519160.5278, NA)), `11_15ng` = list(c(32859283128.8916, 
    15400735421.5), c(161633827056.573, 60508117975.6097), c(73276218943.4545, 
    NA)), `12_15ng` = list(c(34372085877.8071, 15329671283.3959
    ), c(165557046117.222, 63526552497.161), c(76374667334.5682, 
    NA))), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-3L))

And the warning message was,

Warning message:
Values from `mass` are not uniquely identified; output will contain list-cols.
* Use `values_fn = list` to suppress this warning.
* Use `values_fn = {summary_fun}` to summarise duplicates.
* Use the following dplyr code to identify duplicates.
  {data} %>%
    dplyr::group_by(rowid, name) %>%
    dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
    dplyr::filter(n > 1L)

How to get the dataframe output which contain single values in each column instead of list-cols.

tidyr pivot R dataframe • 852 views
ADD COMMENT
1
Entering edit mode

How would you like to deal with duplicate values ?

ADD REPLY

Login before adding your answer.

Traffic: 2382 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6