创建一个函数以在R中运行条件求和



我有一个这样的数据帧:

dat<- data.frame (
'Ones'=c(0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0), 
'Thats'=c(0,5,3,6,8,4,5,6,8,3,1,3,4,5,6,7,4,3,4,5))

我必须创建一个函数(gap1(,它在1中检测每个1,然后在Thats中求和n-1、n和n+1,其中n与1在同一行。

例如,在这个数据集中,我有两个1。

dat<- data.frame (
'Ones'=c(0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0), 
'Thats'=c(0,5,3,6,8,4,5,6,8,3,1,3,4,5,6,7,4,3,4,5))
dat

这应该是输出:

Ones  Thats  gap1
1      4     17   #(8+4+5)
1      1      7   #(3+1+3)

我想随意扩大这个差距,例如:

Ones  Thats      gap1       gap2            gap3 ...
1      4         17         29  #(6+8+4+5+6)
1      1         7           9  #(8+3+1+3+4)

还有一个问题我必须考虑:假设我们有这个数据帧:

dat<- data.frame (
'Ones'=c(1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0), 
'Thats'=c(0,5,3,6,8,4,5,6,8,3,1,NA,4,5,6,7,4,3,4,5))

如果开头(或结尾(有1,或者有NA,则函数应使用可用数据。

在这种情况下,例如:

Ones  Thats        gap1          gap2
1         0         5 (0+5)      8     #(0+5+3)
1         4         17 (8+4+5)   29    #(6+8+4+5+6)
1         1         4 (3+1+NA)   16    #(8+3+1+NA+4)


你有什么建议吗?

使用tidyverse/collapse

对于任意数量的超前和滞后,collapse包提供了一个很好的函数flag,它有更多的参数来指定列(cols(或分组变量g

library(dplyr)
f <- function(df, n){
df %>%
collapse::flag(-n:n) %>%
transmute(Ones, Thats, gap = rowSums(., na.rm = T) - 1) %>%
filter(Ones == 1)
}
x <- data.frame (
'Ones'=c(1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0), 
'Thats'=c(0,5,3,6,8,4,5,6,8,3,1,NA,4,5,6,7,4,3,4,5))
# we can now specify how many lags to count:
f(x, 1) 
Ones Thats gap
1    1     0   5
2    1     4  17
3    1     1   4
f(x, 2)
Ones Thats gap
1    1     0   8
2    1     4  29
3    1     1  16

或者,如果你想指定要计算的间隙数量,我们可以将函数简化为

f <- function(df, n){
df %>%
collapse::flag(-n:n) %>%
rowSums(na.rm = T) - 1
}
x %>%
mutate(gap1 = f(., 1),
gap2 = f(., 2)) %>%
filter(Ones == 1)
Ones Thats gap1 gap2
1    1     0    5    8
2    1     4   17   29
3    1     1    4   16

基本R如果你喜欢简洁的函数:

f <- Vectorize((df, n) rowSums(collapse::flag(df, -n:n), na.rm = T) - 1, "n")
x[paste0("gap", 1:2)] <- f(x, 1:2) ; subset(x, Ones == 1)
Ones Thats gap1 gap2
1     1     0    5    8
6     1     4   17   29
11    1     1    4   16

使用BaseR

myfun <- function(data,gap=1) {
points <- which(data["Ones"]==1)
sapply(points, function(x) {
bottom <- ifelse(x-gap<=0,1,x -gap) 
top <- ifelse(x+ gap > nrow(data),nrow(data),x +gap)
sum(data[bottom:top,"Thats"], na.rm=T)
})
}
#> myfun(dat,1)
#[1]  5 17  4
#> myfun(dat,2)
#[1]  8 29 16

另一个基本R解决方案

f <- function(dat, width = 1)
{
dat$gaps <- sapply(seq(nrow(dat)), function(x) {
if(dat$Ones[x] == 0) return(0)
i <- x + seq(2 * width + 1) - (width + 1)
i <- i[i > 0]
i <- i[i < nrow(dat)]
sum(dat$Thats[i])
})
dat[dat$Ones == 1,]
}
f(dat, 1)
#>    Ones Thats gaps
#> 6     1     4   17
#> 11    1     1    7
f(dat, 2)
#>    Ones Thats gaps
#> 6     1     4   29
#> 11    1     1   19

最新更新