将 df 作为:
ID Status Created_Date Booking_Date Price_Booking
1 Confirmed "2013-03-01" "2013-08-21" 400
1 Confirmed "2013-03-01" "2013-10-01" 350
2 Confirmed "2013-04-11" "2013-10-01" 299
2 Confirmed "2013-04-11" "2013-10-01" 178
3 Cancelled "2013-02-21" "2014-04-01" 99
4 Confirmed "2013-08-30" "2013-10-01" 525
5 Confirmed "2014-01-01" "2014-12-01" 439
6 Confirmed "2015-02-22" "2015-11-18" 200
6 Confirmed "2015-07-13" "2017-04-09" 100
想要根据变量计算第一年内每个客户的收入Created_Date。
我试过了:
with(df$ID[df$Status=="Confirmed" & format(as.Date(df$Created_Date), "%Y") == 2013 & format(as.Date(df$Booking_Date), "%Y") == 2013]))
然而,这只计算每个日历年的收入,我想让它相对于Created_Date
预期输出为:
ID Sum_Price_Booking
1 750
2 477
3 NA
4 525
5 439
6 200
我们可以为那些在Booking_Date
和Created_Date
之间相差小于 1 年的值group_by
ID
和sum
Price_Booking
值。
library(dplyr)
df %>%
mutate_at(vars(ends_with("Date")), as.Date) %>%
group_by(ID) %>%
summarise(sum = sum(Price_Booking[Booking_Date - Created_Date < 365]))
# ID sum
# <int> <int>
#1 1 750
#2 2 477
#3 3 0
#4 4 525
#5 5 439
#6 6 200
数据
df <- structure(list(ID = c(1L, 1L, 2L, 2L, 3L, 4L, 5L, 6L, 6L),
Status = structure(c(2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L),
.Label = c("Cancelled", "Confirmed"), class = "factor"),
Created_Date = structure(c(2L, 2L, 3L, 3L, 1L, 4L, 5L, 6L, 7L),
.Label = c("2013-02-21", "2013-03-01", "2013-04-11", "2013-08-30", "2014-01-01",
"2015-02-22", "2015-07-13"), class = "factor"), Booking_Date =
structure(c(1L, 2L, 2L, 2L, 3L, 2L, 4L, 5L, 6L),
.Label = c("2013-08-21", "2013-10-01", "2014-04-01", "2014-12-01", "2015-11-18",
"2017-04-09"), class = "factor"), Price_Booking = c(400L, 350L, 299L, 178L, 99L,
525L, 439L,200L, 100L)), class = "data.frame", row.names = c(NA, -9L))
您可以使用 data.table 的方式 by = 您可以选择聚合
library(data.table)
library(lubridate)
dt <- data.table(
ID = c(1, 1, 2, 2, 3, 4, 5, 6, 6),
Status = c(
'Confirmed',
'Confirmed',
'Confirmed',
'Confirmed',
'Cancelled',
'Confirmed',
'Confirmed',
'Confirmed',
'Confirmed'
),
Created_Date = as.Date(
c(
"2013-03-01",
"2013-03-01",
"2013-04-11",
"2013-04-11",
"2013-02-21",
"2013-08-30",
"2014-01-01",
"2015-02-22",
"2015-07-13"
)
),
Booking_Date = as.Date(
c(
"2013-08-21",
"2013-10-01",
"2013-10-01",
"2013-10-01",
"2014-04-01",
"2013-10-01",
"2014-12-01",
"2015-11-18",
"2017-04-09"
)
),
Price_Booking = c(400,
350,
299,
178,
99,
525,
439,
200,
100)
)
dt[Status == 'Confirmed', .(price_sum = sum(Price_Booking)), by = .(Year = year(Created_Date), ID)]