有人知道如何执行以下操作吗:
我有一个数据框架(见下面的示例(,每个match_id由两支球队组成,球队的队形和动作(例如传球和铲球(。我想创建一个变量";home_team";,其在出现新的match_ id时捕获每个第一队队形。在该示例中,这将再次是433、4222和433。
dat <- data.frame(
match_id = c(1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,3,3),
team_name = c("Barcelona", "Valencia", "Barcelona","Barcelona","Barcelona","Barcelona","Barcelona","Barcelona", "Getafe", "Barcelona",
"Getafe", "Getafe", "Getafe", "Barcelona", "Getafe", "Getafe", "Getafe", "Barcelona", "Barcelona", "Barcelona", "Real Madrid", "Barcelona"),
tactics.formation = c(433, 442, NA, NA, NA, NA, NA, NA, 4222, 433, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 433, 4132),
id = seq(1:22),
actions = c("STARTING XI", "STARTING XI", "pass", "pass", "pass", "pass", "dribble", "dribble", "STARTING XI", "STARTING XI",
"pass", "pass", "pass", "pass", "dribble", "dribble", "tackle", "pass", "dribble", "pass", "STARTING XI", "STARTING XI")
)
dat
match_id team_name tactics.formation id actions
1 1 Barcelona 433 1 STARTING XI
2 1 Valencia 442 2 STARTING XI
3 1 Barcelona NA 3 pass
4 1 Barcelona NA 4 pass
5 1 Barcelona NA 5 pass
6 1 Barcelona NA 6 pass
7 1 Barcelona NA 7 dribble
8 1 Barcelona NA 8 dribble
9 2 Getafe 4222 9 STARTING XI
10 2 Barcelona 433 10 STARTING XI
11 2 Getafe NA 11 pass
12 2 Getafe NA 12 pass
13 2 Getafe NA 13 pass
14 2 Barcelona NA 14 pass
15 2 Getafe NA 15 dribble
16 2 Getafe NA 16 dribble
17 2 Getafe NA 17 tackle
18 2 Barcelona NA 18 pass
19 2 Barcelona NA 19 dribble
20 2 Barcelona NA 20 pass
21 3 Real Madrid 433 21 STARTING XI
22 3 Barcelona 4132 22 STARTING XI
现在我想创建一个新的专栏,对一支球队是否在进行主场比赛进行分类。当出现新的match_id时,它被认为是主场比赛。然而,我真的不知道如何迭代。
最后一个例子应该是这样的:
dat_new <- data.frame(
match_id = c(1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,3,3),
team_name = c("Barcelona", "Valencia", "Barcelona","Barcelona","Barcelona","Barcelona","Barcelona","Barcelona", "Getafe", "Barcelona",
"Getafe", "Getafe", "Getafe", "Barcelona", "Getafe", "Getafe", "Getafe", "Barcelona", "Barcelona", "Barcelona", "Real Madrid", "Barcelona"),
tactics.formation = c(433, 442, NA, NA, NA, NA, NA, NA, 4222, 433, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 433, 4132),
id = seq(1:22),
actions = c("STARTING XI", "STARTING XI", "pass", "pass", "pass", "pass", "dribble", "dribble", "STARTING XI", "STARTING XI",
"pass", "pass", "pass", "pass", "dribble", "dribble", "tackle", "pass", "dribble", "pass", "STARTING XI", "STARTING XI"),
home_team = c("TRUE", "FALSE", "TRUE", "TRUE", "TRUE", "TRUE", "TRUE", "TRUE", "TRUE", "FALSE", "TRUE", "TRUE", "TRUE", "FALSE", "TRUE",
"TRUE", "TRUE", "FALSE", "FALSE", "FALSE", "TRUE", "FALSE")
)
dat_new
match_id team_name tactics.formation id actions home_team
1 1 Barcelona 433 1 STARTING XI TRUE
2 1 Valencia 442 2 STARTING XI FALSE
3 1 Barcelona NA 3 pass TRUE
4 1 Barcelona NA 4 pass TRUE
5 1 Barcelona NA 5 pass TRUE
6 1 Barcelona NA 6 pass TRUE
7 1 Barcelona NA 7 dribble TRUE
8 1 Barcelona NA 8 dribble TRUE
9 2 Getafe 4222 9 STARTING XI TRUE
10 2 Barcelona 433 10 STARTING XI FALSE
11 2 Getafe NA 11 pass TRUE
12 2 Getafe NA 12 pass TRUE
13 2 Getafe NA 13 pass TRUE
14 2 Barcelona NA 14 pass FALSE
15 2 Getafe NA 15 dribble TRUE
16 2 Getafe NA 16 dribble TRUE
17 2 Getafe NA 17 tackle TRUE
18 2 Barcelona NA 18 pass FALSE
19 2 Barcelona NA 19 dribble FALSE
20 2 Barcelona NA 20 pass FALSE
21 3 Real Madrid 433 21 STARTING XI TRUE
22 3 Barcelona 4132 22 STARTING XI FALSE
有谁能帮忙吗?:(
您可以通过移位match_id
并从match_id
:中减去来获得new_match
索引
new_match <- as.logical(dat$match_id - c(FALSE, head(dat$match_id, -1)))
# [1] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
# [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
将每个new_match
的home_team
设置为team_name
,并使用na.locf()
传播丢失的值:
library(zoo)
dat$home_team <- na.locf(ifelse(new_match, dat$team_name, NA))
# match_id team_name tactics.formation id actions home_team
# 1 1 Barcelona 433 1 STARTING XI Barcelona
# 2 1 Valencia 442 2 STARTING XI Barcelona
# 3 1 Barcelona NA 3 pass Barcelona
# ...
# 19 2 Barcelona NA 19 dribble Getafe
# 20 2 Barcelona NA 20 pass Getafe
# 21 3 Real Madrid 433 21 STARTING XI Real Madrid
# 22 3 Barcelona 4132 22 STARTING XI Real Madrid
然后检查team_name
是否与home_team
:匹配
dat$home_team <- dat$team_name == dat$home_team
# match_id team_name tactics.formation id actions home_team
# 1 1 Barcelona 433 1 STARTING XI TRUE
# 2 1 Valencia 442 2 STARTING XI FALSE
# 3 1 Barcelona NA 3 pass TRUE
# 4 1 Barcelona NA 4 pass TRUE
# 5 1 Barcelona NA 5 pass TRUE
# 6 1 Barcelona NA 6 pass TRUE
# 7 1 Barcelona NA 7 dribble TRUE
# 8 1 Barcelona NA 8 dribble TRUE
# 9 2 Getafe 4222 9 STARTING XI TRUE
# 10 2 Barcelona 433 10 STARTING XI FALSE
# 11 2 Getafe NA 11 pass TRUE
# 12 2 Getafe NA 12 pass TRUE
# 13 2 Getafe NA 13 pass TRUE
# 14 2 Barcelona NA 14 pass FALSE
# 15 2 Getafe NA 15 dribble TRUE
# 16 2 Getafe NA 16 dribble TRUE
# 17 2 Getafe NA 17 tackle TRUE
# 18 2 Barcelona NA 18 pass FALSE
# 19 2 Barcelona NA 19 dribble FALSE
# 20 2 Barcelona NA 20 pass FALSE
# 21 3 Real Madrid 433 21 STARTING XI TRUE
# 22 3 Barcelona 4132 22 STARTING XI FALSE