我有一个简单的表(包含在下面(,我想在其中创建第三列,称之为st_date,其中第一行的值将是一个固定值(比如2020年1月30日(。
对于后面的每一行,我希望st_date中的值是前一行的滞后日期值+以工作日(而不是周末(为单位的长度值
例如:因此,在第2行,值应为2020年1月30日+7个工作日=2020年2月10日第2行的值应为2020年10月2日+10=2020年2月25日
生成原始表格的代码是
tmp <- as.data.frame(unclass(rle(t_1$BB_W_D))) %>%
mutate(st_date=df_start_date)
===>df_start_date是我想在第1行中的开始日期
structure(list(lengths = c(1L, 7L, 10L, 6L, 2L, 1L, 2L, 4L, 2L,
4L, 9L, 7L, 5L, 3L, 5L, 8L, 5L, 10L, 10L, 3L, 1L, 2L, 6L, 2L,
1L, 2L, 1L, 2L, 1L, 3L, 1L, 4L, 3L, 13L, 10L, 5L, 1L, 10L, 1L,
6L, 2L, 3L, 1L, 1L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 8L, 12L, 2L,
1L, 3L, 6L, 8L, 10L, 6L, 2L, 1L, 2L, 4L, 2L, 4L, 9L, 7L, 5L,
3L, 5L, 8L, 5L, 10L, 10L, 3L, 1L, 2L, 6L, 2L, 1L, 2L, 1L, 2L,
1L, 3L, 1L, 4L, 3L, 13L, 10L, 5L, 1L, 10L, 1L, 6L, 2L, 3L, 1L,
1L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 8L, 12L, 2L, 1L, 3L, 6L), values = structure(c(NA,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L
), .Label = c("Down", "Up"), class = "factor"), st_date = structure(c(18291,
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291,
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291,
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291,
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291,
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291,
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291,
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291,
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291,
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291,
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291,
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291,
18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291, 18291,
18291, 18291, 18291, 18291), class = "Date")), class = "data.frame", row.names = c(NA,
-113L))
当我运行下一个代码集时,
tmp <- tmp %>%
mutate(st_date=lag(st_date,1)+lengths)
它创建了以下内容,其中它不保留第1行中的值,并且后面的每一行现在只是从2020年1月30日的原始值开始递增。
不确定断开连接的位置,因为我以前使用过滞后,而且在之前从未表现出这种行为
structure(list(lengths = c(1L, 7L, 10L, 6L, 2L, 1L, 2L, 4L, 2L,
4L, 9L, 7L, 5L, 3L, 5L, 8L, 5L, 10L, 10L, 3L, 1L, 2L, 6L, 2L,
1L, 2L, 1L, 2L, 1L, 3L, 1L, 4L, 3L, 13L, 10L, 5L, 1L, 10L, 1L,
6L, 2L, 3L, 1L, 1L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 8L, 12L, 2L,
1L, 3L, 6L, 8L, 10L, 6L, 2L, 1L, 2L, 4L, 2L, 4L, 9L, 7L, 5L,
3L, 5L, 8L, 5L, 10L, 10L, 3L, 1L, 2L, 6L, 2L, 1L, 2L, 1L, 2L,
1L, 3L, 1L, 4L, 3L, 13L, 10L, 5L, 1L, 10L, 1L, 6L, 2L, 3L, 1L,
1L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 8L, 12L, 2L, 1L, 3L, 6L), values = structure(c(NA,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L
), .Label = c("Down", "Up"), class = "factor"), st_date = structure(c(NA,
18298, 18301, 18297, 18293, 18292, 18293, 18295, 18293, 18295,
18300, 18298, 18296, 18294, 18296, 18299, 18296, 18301, 18301,
18294, 18292, 18293, 18297, 18293, 18292, 18293, 18292, 18293,
18292, 18294, 18292, 18295, 18294, 18304, 18301, 18296, 18292,
18301, 18292, 18297, 18293, 18294, 18292, 18292, 18294, 18294,
18293, 18293, 18293, 18293, 18293, 18299, 18303, 18293, 18292,
18294, 18297, 18299, 18301, 18297, 18293, 18292, 18293, 18295,
18293, 18295, 18300, 18298, 18296, 18294, 18296, 18299, 18296,
18301, 18301, 18294, 18292, 18293, 18297, 18293, 18292, 18293,
18292, 18293, 18292, 18294, 18292, 18295, 18294, 18304, 18301,
18296, 18292, 18301, 18292, 18297, 18293, 18294, 18292, 18292,
18294, 18294, 18293, 18293, 18293, 18293, 18293, 18299, 18303,
18293, 18292, 18294, 18297), class = "Date")), class = "data.frame", row.names = c(NA,
-113L))
下面是一个使用简单for
循环的示例。此代码
library(bizdays)
create.calendar(name="my_cal", weekdays = c("saturday", "sunday"))
for (idx in 2:nrow(df)) {
days.to.add <- df$lengths[idx]
previous.date <- df$st_date[idx - 1]
new.date <- offset(previous.date, days.to.add, "my_cal")
df$st_date[idx] <- new.date
}
head(df)
为您提供输出。
lengths values st_date
1 1 <NA> 2020-01-30
2 7 Up 2020-02-10
3 10 Down 2020-02-24
4 6 Up 2020-03-03
5 2 Down 2020-03-05
6 1 Up 2020-03-06
这是结果数据帧。
structure(list(lengths = c(1L, 7L, 10L, 6L, 2L, 1L, 2L, 4L, 2L,
4L, 9L, 7L, 5L, 3L, 5L, 8L, 5L, 10L, 10L, 3L, 1L, 2L, 6L, 2L,
1L, 2L, 1L, 2L, 1L, 3L, 1L, 4L, 3L, 13L, 10L, 5L, 1L, 10L, 1L,
6L, 2L, 3L, 1L, 1L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 8L, 12L, 2L,
1L, 3L, 6L, 8L, 10L, 6L, 2L, 1L, 2L, 4L, 2L, 4L, 9L, 7L, 5L,
3L, 5L, 8L, 5L, 10L, 10L, 3L, 1L, 2L, 6L, 2L, 1L, 2L, 1L, 2L,
1L, 3L, 1L, 4L, 3L, 13L, 10L, 5L, 1L, 10L, 1L, 6L, 2L, 3L, 1L,
1L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 8L, 12L, 2L, 1L, 3L, 6L), values = structure(c(NA,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L
), .Label = c("Down", "Up"), class = "factor"), st_date = structure(c(18291,
18302, 18316, 18324, 18326, 18327, 18331, 18337, 18339, 18345,
18358, 18367, 18374, 18379, 18386, 18396, 18403, 18417, 18431,
18436, 18437, 18439, 18449, 18451, 18452, 18456, 18457, 18459,
18460, 18465, 18466, 18472, 18477, 18494, 18508, 18515, 18516,
18530, 18533, 18541, 18543, 18548, 18549, 18550, 18555, 18558,
18562, 18564, 18568, 18570, 18572, 18584, 18600, 18604, 18605,
18610, 18618, 18628, 18642, 18652, 18654, 18655, 18659, 18663,
18667, 18673, 18684, 18695, 18702, 18705, 18712, 18724, 18731,
18745, 18759, 18764, 18765, 18767, 18775, 18779, 18780, 18782,
18785, 18787, 18788, 18793, 18794, 18800, 18803, 18822, 18836,
18843, 18844, 18858, 18859, 18869, 18871, 18876, 18877, 18878,
18883, 18886, 18890, 18892, 18894, 18898, 18900, 18912, 18928,
18932, 18933, 18936, 18946), class = "Date")), row.names = c(NA,
-113L), class = "data.frame")
HTH-
这里有一个使用purrr::accumulate
的方法。
就像@Alexlok一样,我受到了之前这个答案的启发(谢谢Rich!(。
accumulate
将具有两个自变量(.x
和.y
(的函数应用于向量中的前一个值和下一个值。可以使用.init =
定义初始值。
因此在第一种情况下CCD_ 7和CCD_。
接下来,我们创建一个日期序列,从开始日期到未来足够长的距离,这样我们就不可能有太多假期。然后,我们使用timeDate::isBizday
函数来确定这些日期中的哪一个是工作日,并根据未来的长度对其进行子集划分(.y
(。
然后,我们返回下一轮的.x
日期。对于整个CCD_ 12,该过程完成。
accumulate
返回.init
作为第一个值,因此我们可以使用[-1]
。它还返回与.x
相同的类型,因此我们需要转换回具有as.Date
的日期。
library(dplyr)
library(purrr)
library(timeDate)
as_tibble(tmp) %>%
mutate(st_date = as.Date(accumulate(lengths,~{
dates <- .x + days(seq(1,3+2*.y))
bizdates <- dates[isBizday(as.timeDate(dates))]
bizdates[.y]},.init = as.Date("2020-01-30")-1)[-1]))
## A tibble: 113 x 3
# lengths values st_date
# <int> <fct> <date>
# 1 1 NA 2020-01-30
# 2 7 Up 2020-02-10
# 3 10 Down 2020-02-24
# 4 6 Up 2020-03-03
# 5 2 Down 2020-03-05