如何在R中提取数据框中所有列的最小值?



我正在研究质谱蛋白质组学表达数据。为了对数据进行统计分析,我必须找到数据框架中每列的前三个最小值,如下所示

structure(list(Type = c("knn_vsn", "knn_loess", "knn_rlr", "lls_vsn", 
"lls_loess", "lls_rlr", "svd_vsn", "svd_loess", "svd_rlr"), Group1 = c(0.00318368971435714, 
0.00317086486813191, 0.00317086486813191, 0.00312821095645019, 
0.00311632537571597, 0.00313568333628438, 0.00394831935666465, 
0.00393605637633005, 0.00395599132474446), Group2 = c(0.0056588221783197, 
0.00560933517836751, 0.00560933517836751, 0.00550114679857588, 
0.00548316209864631, 0.00550230673346083, 0.00737865310351839, 
0.0073411154394253, 0.00735748595511963), Group3 = c(0.00418838138878096, 
0.00417201215938804, 0.00417201215938804, 0.00398819978362592, 
0.00397093259462351, 0.00398827962107259, 0.00424157479553304, 
0.00422638750183658, 0.00424175886713471), Group4 = c(0.0039811913527127, 
0.00394649435912413, 0.00394649435912413, 0.00397059873107098, 
0.00393840233766712, 0.00396385071387178, 0.0041077267588457, 
0.00407577176849463, 0.00410191492380459)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -9L), groups = structure(list(
Type = c("knn_loess", "knn_rlr", "knn_vsn", "lls_loess", 
"lls_rlr", "lls_vsn", "svd_loess", "svd_rlr", "svd_vsn"), 
.rows = structure(list(2L, 3L, 1L, 5L, 6L, 4L, 8L, 9L, 7L), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -9L), .drop = TRUE))

我需要如下输出,

structure(list(`Type ` = c("lls_loess", "lls_rlr", "lls_vsn"), 
Group1 = c(0.00311632537571597, 0.00313568333628438, 0.00312821095645019
), ` Type` = c("lls_loess", "lls_rlr", "lls_vsn"), Group2 = c(0.00548316209864631, 
0.00550230673346083, 0.00550114679857588), `  Type` = c("lls_loess", 
"lls_rlr", "lls_vsn"), Group3 = c(0.00397093259462351, 0.00398827962107259, 
0.00398819978362592), `Type  ` = c("lls_loess", "lls_rlr", 
"lls_vsn"), Group4 = c(0.00393840233766712, 0.00396385071387178, 
0.00397059873107098)), class = "data.frame", row.names = c(NA, 
-3L))

请为这个问题提供一些有用的R代码。提前谢谢你。

library(tidyverse)
df %>%
pivot_longer(-Type) %>%
group_by(name) %>%
slice_min(value, n = 3) %>%      # You might stop here, already tidy
mutate(row = row_number()) %>%
ungroup() %>%
pivot_wider(names_from = name, values_from = c(Type, value),
names_vary = "slowest")

结果

# A tibble: 3 × 9
row Type_Group1 value_Group1 Type_Group2 value_Group2 Type_Group3 value_Group3 Type_Group4 value_Group4
<int> <chr>              <dbl> <chr>              <dbl> <chr>              <dbl> <chr>              <dbl>
1     1 lls_loess        0.00312 lls_loess        0.00548 lls_loess        0.00397 lls_loess        0.00394
2     2 lls_vsn          0.00313 lls_vsn          0.00550 lls_vsn          0.00399 knn_loess        0.00395
3     3 lls_rlr          0.00314 lls_rlr          0.00550 lls_rlr          0.00399 knn_rlr          0.00395

看看这个

data <- data.frame(group = rep(letters[1:3], each = 5),data值= 1:3)数据

另一种可能的解决方案,基于purrr::imap_dfc:

library(tidyverse)
imap_dfc(2:ncol(df), ~ df %>% ungroup %>% .[c(1,.x)] %>% 
slice_min(df[[.x]], n = 3) %>% set_names(c(paste0("Type",.y), names(df)[.x])))
#> # A tibble: 3 × 8
#>   Type1      Group1 Type2      Group2 Type3      Group3 Type4      Group4
#>   <chr>       <dbl> <chr>       <dbl> <chr>       <dbl> <chr>       <dbl>
#> 1 lls_loess 0.00312 lls_loess 0.00548 lls_loess 0.00397 lls_loess 0.00394
#> 2 lls_vsn   0.00313 lls_vsn   0.00550 lls_vsn   0.00399 knn_loess 0.00395
#> 3 lls_rlr   0.00314 lls_rlr   0.00550 lls_rlr   0.00399 knn_rlr   0.00395

注意:你的原始数据是分组的:这就是为什么我在我的解决方案中使用ungroup

最新更新