我一直在尝试对我的庞大数据集执行Wilcoxon Signed Rank测试。下面是我的一个代表性的示例
sample Abiotrophia Abyssicoccus Acaryochloris yield day season
R11P4_BS 0.454828660436137 8.71569632259294 0 low 60 2
R13P1_BS 0.013239875389408 10.8649859288577 0.147574819401445 high 60 1
R13P3_BS 0 5.13545281606475 0.386996904024768 low 30 1
我想比较并获得我的样本组的p值(例如:产量高和低),每个细菌(Abyssicoccus albus, Acaryochloris marina, Acetilactobacillus jinshanensis)基于Wilcoxon符号秩检验。最终目标是确定哪些细菌在"高"one_answers"低"方面有显著差异。
> dput(head(dat))
structure(list(Abiotrophia = c(0, 3.21408045977011, 0.117816091954023,
0, 0.002873563218391, 0), Abyssicoccus = c(0.454828660436137,
0.013239875389408, 0, 0, 0, 0.007009345794393), Acaryochloris = c(8.71569632259294,
10.8649859288577, 5.13545281606475, 5.72940386089162, 0.888392623745432,
3.93946335292641), day = c(0L, 60L, 60L, 60L, 90L, 90L), yield = c("low",
"high", "high", "low", "high", "high"), season = c(1L, 1L,
1L, 1L, 1L, 1L)), row.names = c("R11P4_BS", "R13P1_BS", "R13P3_BS",
"R13P6_BS", "R14P1_BS", "R14P3_BS"), class = "data.frame")
这就是我到目前为止所做的,我相信这显示了第7行数据的p值
wilcox.test(data[data$yield == "low", 7],
data[data$yield == "high", 7], exact=FALSE)$p.val
0.09657031 [1]和下面的代码给了我错误:
sapply(2:ncol(data),
function(x) {
wilcox.test(data[data$yield == "low", 7],
data[data$yield == "high", 7], exact=FALSE)$p.val
}
)
当你说" huge "时,如果你的意思是你有很多行,也许这种方法更合适:
library(tidyverse)
df <- structure(list(Abiotrophia = c(0, 3.21408045977011, 0.117816091954023,
0.1, 0.002873563218391, 0.001), Abyssicoccus = c(0.454828660436137,
0.013239875389408, 0, 0.1, 0.1, 0.007009345794393), Acaryochloris = c(8.71569632259294,
10.8649859288577, 5.13545281606475, 5.72940386089162, 0.888392623745432,
3.93946335292641), day = c(0L, 60L, 60L, 60L, 90L, 90L), yield = c("low",
"high", "high", "low", "high", "low"), season = c(1L, 1L,
1L, 1L, 1L, 1L)), row.names = c("R11P4_BS", "R13P1_BS", "R13P3_BS",
"R13P6_BS", "R14P1_BS", "R14P3_BS"), class = "data.frame")
wilcox_pvalues <- function(dataset, species) {
tmp <- dataset %>%
select({{species}}, yield) %>%
split(.$yield)
stack(Map(function(x, y) wilcox.test(x, y, exact = FALSE)$p.value, tmp[[1]][1], tmp[[2]][1]))
}
wilcox_pvalues(df, Abiotrophia)
#> values ind
#> 1 0.1904303 Abiotrophia
wilcox_pvalues(df, Abyssicoccus)
#> values ind
#> 1 0.5065552 Abyssicoccus
wilcox_pvalues(df, Acaryochloris)
#> values ind
#> 1 1 Acaryochloris
创建于2023-04-05 with reprex v2.0.2
如果你有很多物种,这里有一个不同的选择:
library(tidyverse)
# pivot to 'long' format and drop columns
df2 <- df %>%
pivot_longer(-c(day, yield, season)) %>%
select(-c(day, season))
df2
#> # A tibble: 18 × 3
#> yield name value
#> <chr> <chr> <dbl>
#> 1 low Abiotrophia 0
#> 2 low Abyssicoccus 0.455
#> 3 low Acaryochloris 8.72
#> 4 high Abiotrophia 3.21
#> 5 high Abyssicoccus 0.0132
#> 6 high Acaryochloris 10.9
#> 7 high Abiotrophia 0.118
#> 8 high Abyssicoccus 0
#> 9 high Acaryochloris 5.14
#> 10 low Abiotrophia 0.1
#> 11 low Abyssicoccus 0.1
#> 12 low Acaryochloris 5.73
#> 13 high Abiotrophia 0.00287
#> 14 high Abyssicoccus 0.1
#> 15 high Acaryochloris 0.888
#> 16 low Abiotrophia 0.001
#> 17 low Abyssicoccus 0.00701
#> 18 low Acaryochloris 3.94
# calculate wilcox p-values
df2 %>%
group_by(name) %>%
summarise(pval = wilcox.test(value~yield, paired=FALSE, exact = FALSE)$p.value)
#> # A tibble: 3 × 2
#> name pval
#> <chr> <dbl>
#> 1 Abiotrophia 0.190
#> 2 Abyssicoccus 0.507
#> 3 Acaryochloris 1
创建于2023-04-05 with reprex v2.0.2