我有以下数据:
avector <- c(1,2,3,4,5)
num1 <- 1
num2 <- 2
num3 <- 3
num1%in%avector & num2%in%avector
# TRUE
我想写一个类似的代码行,如果数字可以在avector
中连续找到,则仅为TRUE
。
所需输出:
num1%in%avector & num2%in%avector
# TRUE
# Code similar to this:
num1%in%vector & num3%in%vector
# FALSE
<标题>编辑:这是没有包括在原来的问题,但由于我得到了这么多的答案。
我寻找解决方案的原因是:
num1%in%vector & num3%in%vector
是因为我希望用它来过滤数据(见下面的bonus_dat
):
bonus_dat %>%
filter(lower %in% strata[[1]] & upper %in% strata[[1]])
我试着应用Benson的解决方案,但没有成功:
bonus_dat %>%
filter((lower %in% strata[[1]] & upper %in% strata[[1]] & (( which(strata[[1]] == lower) - which(strata[[1]] == upper)) == 1) ))
因此,可以用来过滤行的解决方案符合我的偏好。
<标题>数据:library(dplyr)
bonus_dat <- structure(list(strata = list(c(0, 25, 100, 500, 1000, 1e+06),
c(0, 25, 100, 500, 1000, 1e+06), c(0, 25, 100, 500, 1000,
1e+06), c(0, 25, 100, 500, 1000, 1e+06), c(0, 25, 100, 500,
1000, 1e+06), c(0, 25, 100, 500, 1000, 1e+06)), lower = c(0L,
25L, 100L, 500L, 500L, 1000L), upper = c(25L, 100L, 500L, 1000L,
1000000L, 1000000L), value = c(1,3,4,6,2,1)), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), row.names = c(NA, -6L), groups = structure(list(
upper = c(25L, 100L, 500L, 1000L, 1000000L), .rows = structure(list(
1L, 2L, 3L, 4L, 5:6), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -5L), .drop = TRUE))
标题>标题>在base
R中,您可以paste
值来制作字符串并检查它是否存在:
grepl(paste(num1, num2, sep = ","), paste(avector, collapse = ","))
grepl(paste(num1, num3, sep = ","), paste(avector, collapse = ","))
我们可以用另一个表达式来检查位置差是否等于1,如果等于1,它们是连续的
(num1 %in% avector & num2 %in% avector) & (abs(which(avector == num2) - which(avector == num1)) == 1)
[1] TRUE
(num1 %in% avector & num3 %in% avector) & (abs(which(avector == num3) - which(avector == num1)) == 1)
[1] FALSE
您可以添加一个额外的AND语句:abs(diff(match(c(x1, x2), vec))) == 1L
is.consec <- function(x1, x2, vec) {
x1 %in% vec & x2 %in% vec & abs(diff(match(c(x1, x2), vec))) == 1L
}
is.consec(num1, num2, avector)
# [1] TRUE
is.consec(num1, num3, avector)
# [1] FALSE
定义自定义函数is.consec()
不是必需的,但它使代码更整洁。
下面的测试返回FALSE
,因为6不包含在avector
中。
is.consec(5, 6, avector)
# [1] FALSE
这个方法可以很容易地用来过滤数据:
bonus_dat %>%
rowwise() %>%
filter(is.consec(lower, upper, strata)) %>%
ungroup()
# # A tibble: 5 × 4
# strata lower upper value
# <list> <int> <int> <dbl>
# 1 <dbl [6]> 0 25 1
# 2 <dbl [6]> 25 100 3
# 3 <dbl [6]> 100 500 4
# 4 <dbl [6]> 500 1000 6
# 5 <dbl [6]> 1000 1000000 1
如何将它们粘贴在一起并使用str_detect
?这样做的优点是很容易扩展到任意数量的数字。
library(stringr)
str_detect(paste0(avector, collapse=""), paste0(c(num1, num2, num3), collapse=""))
[1] TRUE
foo <- function(x) {
for (i in 1L:(length(x) - 2L)) {
if (x[i] == num1 && x[i + 1L] == num2 && x[i + 2L] == num3) return(TRUE)
}
}
foo(avector)
# [1] TRUE
avector <- c(1,2,3,4,5)
num1 <- 1
num2 <- 2
num3 <- 3
diff(which(avector %in% c(num1, num2))) == 1
# TRUE
diff(which(avector %in% c(num1, num3))) == 1
# FALSE
或带有错误处理
avector <- c(1, 2, 3, 4, 5)
num1 <- 1
num2 <- 2
num3 <- 3
num4 <- 6
fun <- function(vec, nums) {
if (!all(nums %in% avector)) stop("not all numbers are present in the vector")
diff(which(vec %in% nums)) == 1)
}
fun(avector, c(num1, num2))
# [1] TRUE
fun(avector, c(num1, num3))
# [1] FALSE
fun(avector, c(num1, num4))
# Error in fun(avector, c(num1, num4)) :
# not all numbers are present in the vector
bonus_dat %>% rowwise() %>% filter(match(upper, strata) - match(lower, strata) == 1L)
似乎奏效了。
library(dplyr)
bonus_dat <- data.frame(lower = c(0L, 25L, 100L, 500L, 500L, 1000L, 400L),
upper = c(25L, 100L, 500L, 1000L, 1000000L, 1000000L, 500L),
value = c(1,3,4,6,2,1,0))
bonus_dat$strata <- list(c(0, 25, 100, 500, 1000, 1e+06),
c(0, 25, 100, 500, 1000, 1e+06),
c(0, 25, 100, 500, 1000, 1e+06),
c(0, 25, 100, 500, 1000, 1e+06),
c(0, 25, 100, 500, 1000, 1e+06),
c(0, 25, 100, 500, 1000, 1e+06),
c(0, 25, 100, 500, 1000, 1e+06))
bonus_dat <- bonus_dat[, c(4, 1:3)]
bonus_dat %>% rowwise() %>% filter(match(upper, strata) - match(lower, strata) == 1L)
#> strata lower upper value
#> 1 0, 25, 100, 500, 1000, 1000000 0 25 1
#> 2 0, 25, 100, 500, 1000, 1000000 25 100 3
#> 3 0, 25, 100, 500, 1000, 1000000 100 500 4
#> 4 0, 25, 100, 500, 1000, 1000000 500 1000 6
#> 5 0, 25, 100, 500, 1000, 1000000 1000 1000000 1