r-希望将前一个日期行字段增加另一列中的值,并且滞后不起作用



我有一个简单的表(包含在下面(,我想在其中创建第三列,称之为st_date,其中第一行的值将是一个固定值(比如2020年1月30日(。

对于后面的每一行,我希望st_date中的值是前一行的滞后日期值+以工作日(而不是周末(为单位的长度值

例如:因此,在第2行,值应为2020年1月30日+7个工作日=2020年2月10日第2行的值应为2020年10月2日+10=2020年2月25日

生成原始表格的代码是

tmp <- as.data.frame(unclass(rle(t_1$BB_W_D))) %>% 
mutate(st_date=df_start_date) 

===>df_start_date是我想在第1行中的开始日期

structure(list(lengths = c(1L, 7L, 10L, 6L, 2L, 1L, 2L, 4L, 2L, 
4L, 9L, 7L, 5L, 3L, 5L, 8L, 5L, 10L, 10L, 3L, 1L, 2L, 6L, 2L, 
1L, 2L, 1L, 2L, 1L, 3L, 1L, 4L, 3L, 13L, 10L, 5L, 1L, 10L, 1L, 
6L, 2L, 3L, 1L, 1L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 8L, 12L, 2L, 
1L, 3L, 6L, 8L, 10L, 6L, 2L, 1L, 2L, 4L, 2L, 4L, 9L, 7L, 5L, 
3L, 5L, 8L, 5L, 10L, 10L, 3L, 1L, 2L, 6L, 2L, 1L, 2L, 1L, 2L, 
1L, 3L, 1L, 4L, 3L, 13L, 10L, 5L, 1L, 10L, 1L, 6L, 2L, 3L, 1L, 
1L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 8L, 12L, 2L, 1L, 3L, 6L), values = structure(c(NA, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L
), .Label = c("Down", "Up"), class = "factor"), st_date = structure(c(18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 
18291, 18291, 18291, 18291), class = "Date")), class = "data.frame", row.names = c(NA, 
-113L))

当我运行下一个代码集时,

tmp <- tmp %>% 
mutate(st_date=lag(st_date,1)+lengths)

它创建了以下内容,其中它不保留第1行中的值,并且后面的每一行现在只是从2020年1月30日的原始值开始递增。

不确定断开连接的位置,因为我以前使用过滞后,而且在之前从未表现出这种行为

structure(list(lengths = c(1L, 7L, 10L, 6L, 2L, 1L, 2L, 4L, 2L, 
4L, 9L, 7L, 5L, 3L, 5L, 8L, 5L, 10L, 10L, 3L, 1L, 2L, 6L, 2L, 
1L, 2L, 1L, 2L, 1L, 3L, 1L, 4L, 3L, 13L, 10L, 5L, 1L, 10L, 1L, 
6L, 2L, 3L, 1L, 1L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 8L, 12L, 2L, 
1L, 3L, 6L, 8L, 10L, 6L, 2L, 1L, 2L, 4L, 2L, 4L, 9L, 7L, 5L, 
3L, 5L, 8L, 5L, 10L, 10L, 3L, 1L, 2L, 6L, 2L, 1L, 2L, 1L, 2L, 
1L, 3L, 1L, 4L, 3L, 13L, 10L, 5L, 1L, 10L, 1L, 6L, 2L, 3L, 1L, 
1L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 8L, 12L, 2L, 1L, 3L, 6L), values = structure(c(NA, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L
), .Label = c("Down", "Up"), class = "factor"), st_date = structure(c(NA, 
18298, 18301, 18297, 18293, 18292, 18293, 18295, 18293, 18295, 
18300, 18298, 18296, 18294, 18296, 18299, 18296, 18301, 18301, 
18294, 18292, 18293, 18297, 18293, 18292, 18293, 18292, 18293, 
18292, 18294, 18292, 18295, 18294, 18304, 18301, 18296, 18292, 
18301, 18292, 18297, 18293, 18294, 18292, 18292, 18294, 18294, 
18293, 18293, 18293, 18293, 18293, 18299, 18303, 18293, 18292, 
18294, 18297, 18299, 18301, 18297, 18293, 18292, 18293, 18295, 
18293, 18295, 18300, 18298, 18296, 18294, 18296, 18299, 18296, 
18301, 18301, 18294, 18292, 18293, 18297, 18293, 18292, 18293, 
18292, 18293, 18292, 18294, 18292, 18295, 18294, 18304, 18301, 
18296, 18292, 18301, 18292, 18297, 18293, 18294, 18292, 18292, 
18294, 18294, 18293, 18293, 18293, 18293, 18293, 18299, 18303, 
18293, 18292, 18294, 18297), class = "Date")), class = "data.frame", row.names = c(NA, 
-113L))

下面是一个使用简单for循环的示例。此代码

library(bizdays)
create.calendar(name="my_cal", weekdays = c("saturday", "sunday"))
for (idx in 2:nrow(df)) {
days.to.add   <- df$lengths[idx]

previous.date <- df$st_date[idx - 1]

new.date      <- offset(previous.date, days.to.add, "my_cal")

df$st_date[idx] <- new.date
}
head(df)

为您提供输出。

lengths values    st_date
1       1   <NA> 2020-01-30
2       7     Up 2020-02-10
3      10   Down 2020-02-24
4       6     Up 2020-03-03
5       2   Down 2020-03-05
6       1     Up 2020-03-06

这是结果数据帧。

structure(list(lengths = c(1L, 7L, 10L, 6L, 2L, 1L, 2L, 4L, 2L, 
4L, 9L, 7L, 5L, 3L, 5L, 8L, 5L, 10L, 10L, 3L, 1L, 2L, 6L, 2L, 
1L, 2L, 1L, 2L, 1L, 3L, 1L, 4L, 3L, 13L, 10L, 5L, 1L, 10L, 1L, 
6L, 2L, 3L, 1L, 1L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 8L, 12L, 2L, 
1L, 3L, 6L, 8L, 10L, 6L, 2L, 1L, 2L, 4L, 2L, 4L, 9L, 7L, 5L, 
3L, 5L, 8L, 5L, 10L, 10L, 3L, 1L, 2L, 6L, 2L, 1L, 2L, 1L, 2L, 
1L, 3L, 1L, 4L, 3L, 13L, 10L, 5L, 1L, 10L, 1L, 6L, 2L, 3L, 1L, 
1L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 8L, 12L, 2L, 1L, 3L, 6L), values = structure(c(NA, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L
), .Label = c("Down", "Up"), class = "factor"), st_date = structure(c(18291, 
18302, 18316, 18324, 18326, 18327, 18331, 18337, 18339, 18345, 
18358, 18367, 18374, 18379, 18386, 18396, 18403, 18417, 18431, 
18436, 18437, 18439, 18449, 18451, 18452, 18456, 18457, 18459, 
18460, 18465, 18466, 18472, 18477, 18494, 18508, 18515, 18516, 
18530, 18533, 18541, 18543, 18548, 18549, 18550, 18555, 18558, 
18562, 18564, 18568, 18570, 18572, 18584, 18600, 18604, 18605, 
18610, 18618, 18628, 18642, 18652, 18654, 18655, 18659, 18663, 
18667, 18673, 18684, 18695, 18702, 18705, 18712, 18724, 18731, 
18745, 18759, 18764, 18765, 18767, 18775, 18779, 18780, 18782, 
18785, 18787, 18788, 18793, 18794, 18800, 18803, 18822, 18836, 
18843, 18844, 18858, 18859, 18869, 18871, 18876, 18877, 18878, 
18883, 18886, 18890, 18892, 18894, 18898, 18900, 18912, 18928, 
18932, 18933, 18936, 18946), class = "Date")), row.names = c(NA, 
-113L), class = "data.frame")

HTH-

这里有一个使用purrr::accumulate的方法。

就像@Alexlok一样,我受到了之前这个答案的启发(谢谢Rich!(。

accumulate将具有两个自变量(.x.y(的函数应用于向量中的前一个值和下一个值。可以使用.init =定义初始值。

因此在第一种情况下CCD_ 7和CCD_。

接下来,我们创建一个日期序列,从开始日期到未来足够长的距离,这样我们就不可能有太多假期。然后,我们使用timeDate::isBizday函数来确定这些日期中的哪一个是工作日,并根据未来的长度对其进行子集划分(.y(。

然后,我们返回下一轮的.x日期。对于整个CCD_ 12,该过程完成。

accumulate返回.init作为第一个值,因此我们可以使用[-1]。它还返回与.x相同的类型,因此我们需要转换回具有as.Date的日期。

library(dplyr)
library(purrr)
library(timeDate)
as_tibble(tmp) %>%
mutate(st_date = as.Date(accumulate(lengths,~{
dates <- .x + days(seq(1,3+2*.y))
bizdates <- dates[isBizday(as.timeDate(dates))]
bizdates[.y]},.init = as.Date("2020-01-30")-1)[-1]))
## A tibble: 113 x 3
#   lengths values st_date   
#     <int> <fct>  <date>    
# 1       1 NA     2020-01-30
# 2       7 Up     2020-02-10
# 3      10 Down   2020-02-24
# 4       6 Up     2020-03-03
# 5       2 Down   2020-03-05

相关内容

  • 没有找到相关文章