>我有以下数据框:
library(tidyverse)
dat <- structure(list(charge.Group3 = c(0.167, 0.167, 0.1, 0.067, 0.033,
0.033, 0.067, 0.133, 0.2, 0.067, 0.133, 0.114, 0.167, 0.033,
0.1, 0.033, 0.133, 0.267, 0.133, 0.233, 0.1, 0.167, 0.067, 0.133,
0.1, 0.133, 0.1, 0.133, 0.1, 0.067, 0.167, 0), hydrophobicity.Group3 = c(0.267,
0.467, 0.067, 0.167, 0.267, 0.1, 0.367, 0.233, 0.367, 0.233,
0.133, 0.205, 0.333, 0.267, 0.267, 0.067, 0.133, 0.3, 0.233,
0.267, 0.5, 0.333, 0.2, 0.5, 0.5, 0.4, 0.033, 0.3, 0.233, 0.5,
0.233, 0.033), class = c("Negative", "Negative", "Positive",
"Positive", "Positive", "Positive", "Positive", "Negative", "Positive",
"Positive", "Positive", "Positive", "Positive", "Positive", "Negative",
"Positive", "Negative", "Negative", "Negative", "Negative", "Negative",
"Negative", "Negative", "Negative", "Negative", "Negative", "Positive",
"Positive", "Positive", "Negative", "Positive", "Negative")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -32L))
dat
#> # A tibble: 32 x 3
#> charge.Group3 hydrophobicity.Group3 class
#> <dbl> <dbl> <chr>
#> 1 0.167 0.267 Negative
#> 2 0.167 0.467 Negative
#> 3 0.1 0.067 Positive
#> 4 0.067 0.167 Positive
#> 5 0.033 0.267 Positive
#> 6 0.033 0.1 Positive
#> 7 0.067 0.367 Positive
#> 8 0.133 0.233 Negative
#> 9 0.2 0.367 Positive
#> 10 0.067 0.233 Positive
#> # ... with 22 more rows
我想为每个功能做什么:charge.Group3
和hydrophobicity.Group3
,在负类和正类之间执行wilcox.test
。最后获取 p 值作为数据框或 tibble:
features pvalue
charge.Group3 0.1088
hydrophobicity.Group3 0.03895
# I do by hand.
请注意,实际上有 2 个以上的功能。 我怎样才能做到这一点?
如果你只需要检验的 p 值,你实际上不需要使用broom
。
library(tidyverse)
dat %>%
gather(group, value, -class) %>% # reshape data
nest(-group) %>% # for each group nest data
mutate(pval = map_dbl(data, ~wilcox.test(value ~ class, data = .)$p.value)) %>% # get p value for wilcoxon test
select(-data) # remove data column
# # A tibble: 2 x 2
# group pval
# <chr> <dbl>
# 1 charge.Group3 0.109
# 2 hydrophobicity.Group3 0.0390
首先重塑将使您能够应用此过程,无论您有多少列,假设class
是唯一的额外变量。
或者您甚至可以map
按照@Moody_Mudskipper建议使用
dat %>%
gather(group, value, -class) %>%
group_by(group) %>%
summarize(results = wilcox.test(value ~ class)$p.value)
如果你真的想参与broom
那么你可以做
library(broom)
dat %>%
gather(group, value, -class) %>%
nest(-group) %>%
mutate(results = map(data, ~tidy(wilcox.test(value ~ class, data = .)))) %>%
select(-data) %>%
unnest(results)
# # A tibble: 2 x 5
# group statistic p.value method alternative
# <chr> <dbl> <dbl> <chr> <chr>
# 1 charge.Group3 170. 0.109 Wilcoxon rank sum test with continuity correction two.sided
# 2 hydrophobicity.Group3 183 0.0390 Wilcoxon rank sum test with continuity correction two.sided
这将返回更多列,但如果需要,您可以保留 p 值。
这是一种使用dplyr::summarize_at
和tidyr::gather
的方法:
library(tidyverse)
dat %>%
summarize_at(c("charge.Group3","hydrophobicity.Group3"),
~wilcox.test(.x ~ .y)$p.value, .$class) %>%
gather(features, pvalue)
# # A tibble: 2 x 2
# features pvalue
# <chr> <dbl>
# 1 charge.Group3 0.109
# 2 hydrophobicity.Group3 0.039
汇总除class
之外的所有变量:
dat %>%
summarize_at(vars(-class),
~wilcox.test(.x ~ .y)$p.value,
.$class) %>%
gather(features,pvalue)