R:如何在不同窗口大小的多列之间求和

  • 本文关键字:之间 求和 窗口大小 r
  • 更新时间 :
  • 英文 :


这里有一个说明性的数据集:

set.seed(1)
sam_dat <- data.frame(
  Operator = seq(1:3),
  t1 = sample(runif(10)*10, 3),
  t2 = sample(runif(3)*10, 3),
  t3 = sample(runif(12)*10, 3),
  t4 = sample(runif(34)*10, 3)
)

计算下列(tidyverse、base R或其他(的最简单方法是什么?

t1_t2 = t1 + t2
t1_t3 = t1 + t2 + t3
t1_t4 = t1 + t2 + t3 + t4

edit有没有一种方法可以做到这一点,而不必显式地写出每个计算?虽然这对于我的示例数据集中的四列是可行的,但我的实际数据还有更多。

通过手动计算,操作员1的结果为:

10.71,  19.40,  20.48

提前感谢!

base R中,我们可以使用transform

transform(sam_dat, t1_t2 = t1 + t2, 
          t1_t3 = t1 + t2 + t3, t1_t4 = t1 + t2 + t3 + t4)
#  Operator       t1       t2       t3       t4     t1_t2    t1_t3    t1_t4
#1        1 5.728534 4.976992 8.696908 1.079436 10.705526 19.40243 20.48187
#2        2 2.655087 7.176185 9.347052 7.829328  9.831272 19.17832 27.00765
#3        3 2.016819 7.698414 1.255551 4.068302  9.715234 10.97078 15.03909

或者另一个选项是将数据集子集为list,然后使用rowSums

sam_dat[c("t1_t2", "t1_t3", "t1_t4")] <- sapply(list(sam_dat[c('t1', 't2')], 
       sam_dat[c('t1', 't2', 't3')], 
          sam_dat[c('t1', 't2', 't3', 't4')]), rowSums)

或者通过索引使其更加紧凑

nm1 <- paste(names(sam_dat)[2], names(sam_dat)[3:5], sep="_")
sam_dat[nm1] <- sapply(3:5, function(i) rowSums(sam_dat[2:i]))

或者另一个选项是matrixStats 中的rowCumsums

library(matrixStats)
sam_dat[nm1] <- rowCumsums(as.matrix(sam_dat[-1]))[,-1]

或者类似于purrraccumulatebase R操作是Reduce

sam_dat[nm1] <- do.call(cbind, Reduce(`+`, sam_dat[-1], accumulate = TRUE)[-1])

或来自dplyrmutate

library(dplyr)
sam_dat %>%
  mutate(t1_t2 = t1 + t2, 
         t1_t3 = t1 + t2 + t3,
         t1_t4 = t1 + t2 + t3 + t4)

或者tidyverse中的另一个选项是转向"长"格式,进行计算,然后转换回"宽">

 library(tidyr)
 library(stringr)
 sam_dat %>%
     pivot_longer(cols = -Operator) %>%
     group_by(Operator) %>%
     mutate(value = cumsum(value)) %>% 
     slice(-1) %>%
     ungroup %>% 
     mutate(name = str_c('t1_', name)) %>%
    pivot_wider(names_from = name, values_from = value) %>% 
    select(-Operator) %>% 
    bind_cols(sam_dat, .)
# A tibble: 3 x 8
#  Operator    t1    t2    t3    t4 t1_t2 t1_t3 t1_t4
#     <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1        1  5.73  4.98  8.70  1.08 10.7   19.4  20.5
#2        2  2.66  7.18  9.35  7.83  9.83  19.2  27.0
#3        3  2.02  7.70  1.26  4.07  9.72  11.0  15.0

另一个选项是在dplyr 中使用rowwise函数

sam_dat[-1]%>%
    rowwise()%>%
    do(setNames(data.frame(t(cumsum(unlist(.)))),sprintf('t1_%s',names(.)))[-1])%>%
    cbind(sam_dat,.)
  Operator       t1       t2       t3       t4     t1_t2    t1_t3    t1_t4
1        1 5.728534 4.976992 8.696908 1.079436 10.705526 19.40243 20.48187
2        2 2.655087 7.176185 9.347052 7.829328  9.831272 19.17832 27.00765
3        3 2.016819 7.698414 1.255551 4.068302  9.715234 10.97078 15.03909

基本操作是:

sam_dat[-1]%>%
    rowwise()%>%
     do(data.frame(t(cumsum(unlist(.)))))

以下是使用apply+cumsum的基本R解决方案,即

sam_dat <- cbind(sam_dat,
                 `colnames<-`(t(apply(sam_dat[-1], 1,cumsum))[,-1],
                              paste0("t1_",names(sam_dat)[-c(1,2)])))

使得

> sam_dat
  Operator       t1       t2       t3       t4     t1_t2    t1_t3    t1_t4
1        1 5.728534 4.976992 8.696908 1.079436 10.705526 19.40243 20.48187
2        2 2.655087 7.176185 9.347052 7.829328  9.831272 19.17832 27.00765
3        3 2.016819 7.698414 1.255551 4.068302  9.715234 10.97078 15.03909

一个dplyrpurrr选项可以是:

map(.x = accumulate(names(sam_dat[-1]), c)[-1], 
    ~ sam_dat %>%
     mutate(!!paste(.x, collapse = "_") := rowSums(select(., one_of(.x))))) %>%
 reduce(full_join)
  Operator       t1       t2        t3       t4    t1_t2 t1_t2_t3 t1_t2_t3_t4
1        1 5.728534 4.976992 0.1339033 7.942399 10.70553 10.83943    18.78183
2        2 3.721239 7.698414 2.6722067 4.590657 11.41965 14.09186    18.68252
3        3 8.983897 3.841037 9.3470523 5.297196 12.82493 22.17199    27.46918

或者进一步匹配您想要的输出:

map(.x = accumulate(names(sam_dat[-1]), c)[-1], 
    ~ sam_dat %>%
     mutate(!!paste(head(.x, 1), tail(.x, 1), sep = "_") := rowSums(select(., one_of(.x))))) %>%
 reduce(full_join)
  Operator       t1       t2        t3       t4    t1_t2    t1_t3    t1_t4
1        1 5.728534 4.976992 0.1339033 7.942399 10.70553 10.83943 18.78183
2        2 3.721239 7.698414 2.6722067 4.590657 11.41965 14.09186 18.68252
3        3 8.983897 3.841037 9.3470523 5.297196 12.82493 22.17199 27.46918

最新更新