我的数据帧
data <- structure(list(col1 = c(125L, 654L, 896L, 154L, 865L, 148L),
col2 = c(489L, 657L, 198L, 269L, 789L, 456L), col3 = c(741L,
852L, 963L, 987L, 951L, 632L), col4 = c(124L, 785L, 874L,
965L, 563L, 145L), col5 = c(963L, 146L, 259L, 367L, 365L,
189L), col6 = c(741L, 777L, 100L, 200L, 956L, 452L), col7 = c(456L,
666L, 300L, 778L, 888L, 999L), col8 = c(254L, 732L, 400L,
500L, 600L, 700L), col9 = c(555L, 638L, 127L, 489L, 545L,
54L), col10 = c(921L, 549L, 111L, 222L, 354L, 355L), GROUP = c(1L,
2L, 3L, 1L, 2L, 3L)), class = "data.frame", row.names = c(NA,
-6L))
功能:
combination <- list(c(1,2),c(3,4),c(5,6))
wilcox.fun <- function(data, id_groups, combination){
mark.list <- list()
result_list <- list()
for (g in id_groups){
df = as.matrix(data[data$GROUP %in% g,])
df <- df[,unique(unlist(combination))]
med <- paste(apply(df, 2, median))
result <- data.frame(matrix(NA,ncol=length(med)+2, nrow=1))
result[1,] <- c(g, med, NA)
for (k in 1:(length(combination))) {
i <- combination[[k]][1]
j <- combination[[k]][2]
test <- wilcox.test(df[,i], df[,j],conf.int = TRUE)
diff.1 <- -round(test$estimate, 2)
result[k,length(med)+2] <- paste0(i,"-", j,": ",diff.1)
}
result_list[[g]] <- result
}
result_new <- do.call(rbind, result_list)
names(result_new) <- c("GROUP", as.character(unique(unlist(colnames(df)))), "dif")
return(result_new)
}
result <- wilcox.fun(data, c("1", "2"),combination)
result
我想得到";diff";列按组在同一单元格中:
我想要什么:
组 | col1 | col2 | col3 | col4 | col5>2col6 | 差异[/tr>
---|---|---|---|---|---|
1 | 139.5 | 379 | 864 | 544.5665>470.5 | 1-2:239.5,3-4:-31.5,5-6:-194.5 |
2 | 759.5 | 723 | 901.5674255.5 | 866.51-2:-36.5,3-4:-227.5,5-6:611 |
计算每个k
的dif
后,使用paste()
将所有内容组合在一起,然后去掉不必要的行。
wilcox.fun <- function(data, id_groups, combination){
mark.list <- list()
result_list <- list()
for (g in id_groups){
df = as.matrix(data[data$GROUP %in% g,])
df <- df[,unique(unlist(combination))]
med <- paste(apply(df, 2, median))
result <- data.frame(matrix(NA,ncol=length(med)+2, nrow=1))
result[1,] <- c(g, med, NA)
for (k in 1:(length(combination))) {
i <- combination[[k]][1]
j <- combination[[k]][2]
test <- wilcox.test(df[,i], df[,j],conf.int = TRUE)
diff.1 <- -round(test$estimate, 2)
result[k,length(med)+2] <- paste0(i,"-", j,": ",diff.1)
}
# Merge the dif vals together
result[1, length(med) + 2] = paste(
result[, length(med) + 2],
collapse = ", "
)
# Only keep the first row
result_list[[g]] <- result[1, ]
}
result_new <- do.call(rbind, result_list)
names(result_new) <- c("GROUP", as.character(unique(unlist(colnames(df)))), "dif")
return(result_new)
}
输出
> wilcox.fun(data, c("1", "2"),combination)
GROUP col1 col2 col3 col4 col5 col6 dif
1 1 139.5 379 864 544.5 665 470.5 1-2: 239.5, 3-4: -319.5, 5-6: -194.5
2 2 759.5 723 901.5 674 255.5 866.5 1-2: -36.5, 3-4: -227.5, 5-6: 611
diff
列似乎只是中位数的差异,所以不确定为什么要定义复杂的wilcox.fun
。您可以通过以下方式更快地获得diff
:
library(dplyr)
data %>%
group_by(GROUP) %>%
summarise(diff = paste0(
"1-2: ", median(col2) - median(col1),
", 3-4: ", median(col4) - median(col3),
", 5-6: ", median(col6) - median(col5)))
## A tibble: 3 × 2
# GROUP diff
# <int> <chr>
#1 1 1-2: 239.5, 3-4: -319.5, 5-6: -194.5
#2 2 1-2: -36.5, 3-4: -227.5, 5-6: 611
#3 3 1-2: -195, 3-4: -288, 5-6: 52