I have to divide my dataframe based on the group conditions. And then I have to extract the top 3 minimum values from particular columns.
Suppose for two groups of data I will get the result like,
structure(list(Type = c("knn_vsn", "knn_vsn", "knn_loess", "knn_loess",
"knn_rlr", "knn_rlr", "lls_vsn", "lls_vsn", "lls_loess", "lls_loess",
"lls_rlr", "lls_rlr", "svd_vsn", "svd_vsn", "svd_loess", "svd_loess",
"svd_rlr", "svd_rlr"), PCV = c(0.00510741446572374, 0.00705765780896556,
0.00509233659481246, 0.00696732302441824, 0.00509225712407119,
0.00696173227550932, 0.00492983133396127, 0.00669466376079551,
0.00491874477556813, 0.0066283342182998, 0.00493450413250135,
0.00663684901164831, 0.00731828997356189, 0.0106867134410024,
0.00729635842702563, 0.0105680795904369, 0.00730343601772899,
0.0105334181341163)), class = "data.frame", row.names = c(NA,
-18L))
Then I will divide the dataframe by using the following code,
row_odd <- seq_len(nrow(total_pcv))%%2
data_row_odd <- total_pcv[row_odd == 1, ]
data_row_even <- total_pcv[row_odd == 0, ]
total_pcv_all <- cbind (data_row_odd, data_row_even)
colnames(total_pcv_all) <- c("Group1", "PCV", "Group2", "PCV")
rownames(total_pcv_all) <- NULL
#Extracting top 3 minimum values in particular column
total_pcv_all <- total_pcv_all%>%slice_min(PCV1, n=3)%>%slice_min(PCV2, n=3)%>%slice_min(PCV3, n=3)%>%slice_min(PCV4, n=3)
And the output will be like,
structure(list(Group1 = c("lls_loess", "lls_rlr", "lls_vsn"),
PCV1 = c(0.00491874477556813, 0.00493450413250135, 0.00492983133396127
), Group2 = c("lls_loess", "lls_rlr", "lls_vsn"), PCV2 = c(0.0066283342182998,
0.00663684901164831, 0.00669466376079551)), class = "data.frame", row.names = c(NA,
-3L))
This is for two groups of data.
Suppose if the group value of the data will be increase more than two. How to modify the above codes or any other useful way is available for this problem.
For example I have attached the four groups dataframe below,
structure(list(Type = c("knn_vsn", "knn_vsn", "knn_vsn", "knn_vsn",
"knn_loess", "knn_loess", "knn_loess", "knn_loess", "knn_rlr",
"knn_rlr", "knn_rlr", "knn_rlr", "lls_vsn", "lls_vsn", "lls_vsn",
"lls_vsn", "lls_loess", "lls_loess", "lls_loess", "lls_loess",
"lls_rlr", "lls_rlr", "lls_rlr", "lls_rlr", "svd_vsn", "svd_vsn",
"svd_vsn", "svd_vsn", "svd_loess", "svd_loess", "svd_loess",
"svd_loess", "svd_rlr", "svd_rlr", "svd_rlr", "svd_rlr"), PCV = c(0.00318368971435714,
0.0056588221783197, 0.00418838138878096, 0.0039811913527127,
0.00317086486813191, 0.00560933517836751, 0.00417201215938804,
0.00394649435912413, 0.00317086486813191, 0.00560933517836751,
0.00417201215938804, 0.00394649435912413, 0.00312821095645019,
0.00550114679857588, 0.00398819978362592, 0.00397059873107098,
0.00311632537571597, 0.00548316209864631, 0.00397093259462351,
0.00393840233766712, 0.00313568333628438, 0.00550230673346083,
0.00398827962107259, 0.00396385071387178, 0.00394831935666465,
0.00737865310351839, 0.00424157479553304, 0.0041077267588457,
0.00393605637633005, 0.0073411154394253, 0.00422638750183658,
0.00407577176849463, 0.00395599132474446, 0.00735748595511963,
0.00424175886713471, 0.00410191492380459)), class = "data.frame", row.names = c(NA,
-36L))
And for this I can modify the above code like,
row_odd <- seq_len(nrow(total_pcv))%%4
data_row_odd0 <- total_pcv[row_odd == 1, ]
data_row_odd1 <- total_pcv[row_odd == 3, ]
data_row_even0 <- total_pcv[row_odd == 2, ]
data_row_even1 <- total_pcv[row_odd == 0, ]
total_pcv_all <- cbind (data_row_odd0, data_row_odd1, data_row_even0,data_row_even1)
colnames(total_pcv_all) <- c("Group1", "PCV1", "Group2", "PCV2", "Group3", "PCV3", "Group4", "PCV4")
rownames(total_pcv_all) <- NULL
total_pcv_all <- total_pcv_all%>%slice_min(PCV1, n=3)%>%slice_min(PCV2, n=3)%>%slice_min(PCV3, n=3)%>%slice_min(PCV4, n=3)
And the output will be like,
structure(list(Group1 = c("lls_loess", "lls_rlr", "lls_vsn"),
PCV1 = c(0.00311632537571597, 0.00313568333628438, 0.00312821095645019
), Group2 = c("lls_loess", "lls_rlr", "lls_vsn"), PCV2 = c(0.00548316209864631,
0.00550230673346083, 0.00550114679857588), Group3 = c("lls_loess",
"lls_rlr", "lls_vsn"), PCV3 = c(0.00397093259462351, 0.00398827962107259,
0.00398819978362592), Group4 = c("lls_loess", "lls_rlr",
"lls_vsn"), PCV4 = c(0.00393840233766712, 0.00396385071387178,
0.00397059873107098)), class = "data.frame", row.names = c(NA,
-3L))
Kindly suggest some code to automate this operation based on 'n' number of group data.
Is there any biological context to this question?
Yes. It is related to proteomics expression data analysis.