r-创建一个循环,用前导变量追加列



我的数据框架包含以下列:日期、id、类别。我想创建一个循环,它附加额外的列(category1、category2等(,其中包含按日期排列的每个id的前导类别。

我没有循环的代码如下:

df <- df %>%
dplyr::group_by(id) %>%
dplyr::mutate(category1 = dplyr::lead(category, n = 1L, order_by = date))
df <- df %>%
dplyr::group_by(id) %>%
dplyr::mutate(category2 = dplyr::lead(category, n = 2L, order_by = date))
df <- df %>%
dplyr::group_by(id) %>%
dplyr::mutate(category3 = dplyr::lead(category, n = 3L, order_by = date))
#and so on.

创建这个循环的最佳方式是什么?

谢谢!

我们可以使用purrr中的mappartial创建所需的lead函数,并使用mutate(across())模式同时应用它们。

library(purrr)
library(dplyr)
df %>% 
group_by(id) %>% 
mutate(across(
category, 
map(1:7, ~partial(lead, n = .x, order_by = date)),
.names = "{.col}{.fn}"
))
# A tibble: 100 × 10
# Groups:   id [4]
id category date       category1 category2 category3 category4 category5
<int> <chr>    <date>     <chr>     <chr>     <chr>     <chr>     <chr>    
1     1 B        2022-01-02 B         B         C         B         C        
2     1 B        2022-01-03 B         C         B         C         C        
3     1 B        2022-01-05 C         B         C         C         C        
4     1 C        2022-01-06 B         C         C         C         B        
5     1 B        2022-01-10 C         C         C         B         C        
6     1 C        2022-01-10 C         C         B         C         A        
7     1 C        2022-01-10 C         B         C         A         C        
8     1 C        2022-01-11 B         C         A         C         B        
9     1 B        2022-01-12 C         A         C         B         B        
10     1 C        2022-01-17 A         C         B         B         A        
11     1 A        2022-01-18 C         B         B         A         B        
12     1 C        2022-01-20 B         B         A         B         A        
13     1 B        2022-01-25 B         A         B         A         C        
14     1 B        2022-01-25 A         B         A         C         C        
15     1 A        2022-01-25 B         A         C         C         C        
16     1 B        2022-01-25 A         C         C         C         C        
17     1 A        2022-01-26 C         C         C         C         B        
18     1 C        2022-01-27 C         C         C         B         A        
19     1 C        2022-01-30 C         C         B         A         B        
20     1 C        2022-02-01 C         B         A         B         C        
21     1 C        2022-02-04 B         A         B         C         C        
22     1 B        2022-02-06 A         B         C         C         A        
23     1 A        2022-02-06 B         C         C         A         C        
24     1 B        2022-02-06 C         C         A         C         NA       
25     1 C        2022-02-11 C         A         C         NA        NA       
26     1 C        2022-02-12 A         C         NA        NA        NA       
27     1 A        2022-02-12 C         NA        NA        NA        NA       
28     1 C        2022-02-14 NA        NA        NA        NA        NA       
29     2 B        2022-01-06 B         C         C         A         A        
# … with 71 more rows, and 2 more variables: category6 <chr>, category7 <chr>

(其中df是定义为:的样本数据

n <- 100
set.seed(123) ; df <- data.frame(
id = sample(1:4, n, replace = TRUE),
category = sample(LETTERS[1:3], n, replace = TRUE),
date = sample(seq(as.Date("2022-01-01"), by = "1 day", length.out = 45), n,
replace = TRUE)
)
df <- df %>% arrange(id, date)

使用数据。表

# example data
dt <- mtcars[1:10, c("cyl", "disp", "mpg")]
setDT(dt)[ order(mpg), c("cat1", "cat2") := lapply(1:2, function(i) shift(disp, i, type = "lead")), by = cyl ]
dt
#     cyl  disp  mpg  cat1  cat2
#  1:   6 160.0 21.0 160.0 258.0
#  2:   6 160.0 21.0 258.0    NA
#  3:   4 108.0 22.8 140.8 146.7
#  4:   6 258.0 21.4    NA    NA
#  5:   8 360.0 18.7    NA    NA
#  6:   6 225.0 18.1 167.6 160.0
#  7:   8 360.0 14.3 360.0    NA
#  8:   4 146.7 24.4    NA    NA
#  9:   4 140.8 22.8 146.7    NA
# 10:   6 167.6 19.2 160.0 160.0

最新更新