这里有一个说明性的数据集:
set.seed(1)
sam_dat <- data.frame(
Operator = seq(1:3),
t1 = sample(runif(10)*10, 3),
t2 = sample(runif(3)*10, 3),
t3 = sample(runif(12)*10, 3),
t4 = sample(runif(34)*10, 3)
)
计算下列(tidyverse、base R或其他(的最简单方法是什么?
t1_t2 = t1 + t2
t1_t3 = t1 + t2 + t3
t1_t4 = t1 + t2 + t3 + t4
edit有没有一种方法可以做到这一点,而不必显式地写出每个计算?虽然这对于我的示例数据集中的四列是可行的,但我的实际数据还有更多。
通过手动计算,操作员1的结果为:
10.71, 19.40, 20.48
提前感谢!
在base R
中,我们可以使用transform
transform(sam_dat, t1_t2 = t1 + t2,
t1_t3 = t1 + t2 + t3, t1_t4 = t1 + t2 + t3 + t4)
# Operator t1 t2 t3 t4 t1_t2 t1_t3 t1_t4
#1 1 5.728534 4.976992 8.696908 1.079436 10.705526 19.40243 20.48187
#2 2 2.655087 7.176185 9.347052 7.829328 9.831272 19.17832 27.00765
#3 3 2.016819 7.698414 1.255551 4.068302 9.715234 10.97078 15.03909
或者另一个选项是将数据集子集为list
,然后使用rowSums
sam_dat[c("t1_t2", "t1_t3", "t1_t4")] <- sapply(list(sam_dat[c('t1', 't2')],
sam_dat[c('t1', 't2', 't3')],
sam_dat[c('t1', 't2', 't3', 't4')]), rowSums)
或者通过索引使其更加紧凑
nm1 <- paste(names(sam_dat)[2], names(sam_dat)[3:5], sep="_")
sam_dat[nm1] <- sapply(3:5, function(i) rowSums(sam_dat[2:i]))
或者另一个选项是matrixStats
中的rowCumsums
library(matrixStats)
sam_dat[nm1] <- rowCumsums(as.matrix(sam_dat[-1]))[,-1]
或者类似于purrr
的accumulate
的base R
操作是Reduce
sam_dat[nm1] <- do.call(cbind, Reduce(`+`, sam_dat[-1], accumulate = TRUE)[-1])
或来自dplyr
的mutate
library(dplyr)
sam_dat %>%
mutate(t1_t2 = t1 + t2,
t1_t3 = t1 + t2 + t3,
t1_t4 = t1 + t2 + t3 + t4)
或者tidyverse
中的另一个选项是转向"长"格式,进行计算,然后转换回"宽">
library(tidyr)
library(stringr)
sam_dat %>%
pivot_longer(cols = -Operator) %>%
group_by(Operator) %>%
mutate(value = cumsum(value)) %>%
slice(-1) %>%
ungroup %>%
mutate(name = str_c('t1_', name)) %>%
pivot_wider(names_from = name, values_from = value) %>%
select(-Operator) %>%
bind_cols(sam_dat, .)
# A tibble: 3 x 8
# Operator t1 t2 t3 t4 t1_t2 t1_t3 t1_t4
# <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 1 5.73 4.98 8.70 1.08 10.7 19.4 20.5
#2 2 2.66 7.18 9.35 7.83 9.83 19.2 27.0
#3 3 2.02 7.70 1.26 4.07 9.72 11.0 15.0
另一个选项是在dplyr 中使用rowwise
函数
sam_dat[-1]%>%
rowwise()%>%
do(setNames(data.frame(t(cumsum(unlist(.)))),sprintf('t1_%s',names(.)))[-1])%>%
cbind(sam_dat,.)
Operator t1 t2 t3 t4 t1_t2 t1_t3 t1_t4
1 1 5.728534 4.976992 8.696908 1.079436 10.705526 19.40243 20.48187
2 2 2.655087 7.176185 9.347052 7.829328 9.831272 19.17832 27.00765
3 3 2.016819 7.698414 1.255551 4.068302 9.715234 10.97078 15.03909
基本操作是:
sam_dat[-1]%>%
rowwise()%>%
do(data.frame(t(cumsum(unlist(.)))))
以下是使用apply
+cumsum
的基本R解决方案,即
sam_dat <- cbind(sam_dat,
`colnames<-`(t(apply(sam_dat[-1], 1,cumsum))[,-1],
paste0("t1_",names(sam_dat)[-c(1,2)])))
使得
> sam_dat
Operator t1 t2 t3 t4 t1_t2 t1_t3 t1_t4
1 1 5.728534 4.976992 8.696908 1.079436 10.705526 19.40243 20.48187
2 2 2.655087 7.176185 9.347052 7.829328 9.831272 19.17832 27.00765
3 3 2.016819 7.698414 1.255551 4.068302 9.715234 10.97078 15.03909
一个dplyr
和purrr
选项可以是:
map(.x = accumulate(names(sam_dat[-1]), c)[-1],
~ sam_dat %>%
mutate(!!paste(.x, collapse = "_") := rowSums(select(., one_of(.x))))) %>%
reduce(full_join)
Operator t1 t2 t3 t4 t1_t2 t1_t2_t3 t1_t2_t3_t4
1 1 5.728534 4.976992 0.1339033 7.942399 10.70553 10.83943 18.78183
2 2 3.721239 7.698414 2.6722067 4.590657 11.41965 14.09186 18.68252
3 3 8.983897 3.841037 9.3470523 5.297196 12.82493 22.17199 27.46918
或者进一步匹配您想要的输出:
map(.x = accumulate(names(sam_dat[-1]), c)[-1],
~ sam_dat %>%
mutate(!!paste(head(.x, 1), tail(.x, 1), sep = "_") := rowSums(select(., one_of(.x))))) %>%
reduce(full_join)
Operator t1 t2 t3 t4 t1_t2 t1_t3 t1_t4
1 1 5.728534 4.976992 0.1339033 7.942399 10.70553 10.83943 18.78183
2 2 3.721239 7.698414 2.6722067 4.590657 11.41965 14.09186 18.68252
3 3 8.983897 3.841037 9.3470523 5.297196 12.82493 22.17199 27.46918