r语言 - 将"填充"限制为图案的最后一次出现



我有这样的数据:

df <- structure(list(line = c("001", "002", "003", "004", "005", "006", 
"007", "008", "009", "010", "011", "012", "013", "014"), 
utterance = c("((m: both hands", 
"((m: both hands", 
"((i: DH=1, SZ=0", "((i: DH=1, SZ=0", 
"((s: Preface))", "((m: both hands", 
"((m: both hands clasped", 
"((m: both hands clasped", 
"((s: Background))", "((m: enumerating", 
"((m: enumerating", 
"((s: End))", "((i: DH=1, SZ=0", "((m: relax gesture))"
)), row.names = c(NA, 14L), class = "data.frame")

我想创建一个新的列story,并使用列utterance中与正则表达式模式\(\(s匹配的值fill该列。但我希望fill停止在与这种模式匹配的最后一个值处,这是((s: End))

fill命令不会在该模式处停止 - 如何在该模式下停止fill

library(tidyr)
df %>%
mutate(story = ifelse(grepl("\(\(s", utterance), utterance, NA)) %>%
fill(story, .direction = "down")
line               utterance             story
1   001         ((m: both hands              <NA>
2   002         ((m: both hands              <NA>
3   003         ((i: DH=1, SZ=0              <NA>
4   004         ((i: DH=1, SZ=0              <NA>
5   005          ((s: Preface))    ((s: Preface))
6   006         ((m: both hands    ((s: Preface))
7   007 ((m: both hands clasped    ((s: Preface))
8   008 ((m: both hands clasped    ((s: Preface))
9   009       ((s: Background)) ((s: Background))
10  010        ((m: enumerating ((s: Background))
11  011        ((m: enumerating ((s: Background))
12  012              ((s: End))        ((s: End))
13  013         ((i: DH=1, SZ=0        ((s: End))
14  014    ((m: relax gesture))        ((s: End))

期望

line               utterance             story
1   001         ((m: both hands              <NA>
2   002         ((m: both hands              <NA>
3   003         ((i: DH=1, SZ=0              <NA>
4   004         ((i: DH=1, SZ=0              <NA>
5   005          ((s: Preface))    ((s: Preface))
6   006         ((m: both hands    ((s: Preface))
7   007 ((m: both hands clasped    ((s: Preface))
8   008 ((m: both hands clasped    ((s: Preface))
9   009       ((s: Background)) ((s: Background))
10  010        ((m: enumerating ((s: Background))
11  011        ((m: enumerating ((s: Background))
12  012              ((s: End))        ((s: End))
13  013         ((i: DH=1, SZ=0              <NA>
14  014    ((m: relax gesture))              <NA>

tidyr::fill本身不会这样做,但您可以再添加一个mutate

df %>%
mutate(story = if_else(grepl("\(\(s", utterance), utterance, NA_character_)) %>%
fill(story, .direction = "down") %>%
mutate(story = if_else(story == last(story) & duplicated(story), NA_character_, story))
#    line               utterance             story
# 1   001         ((m: both hands              <NA>
# 2   002         ((m: both hands              <NA>
# 3   003         ((i: DH=1, SZ=0              <NA>
# 4   004         ((i: DH=1, SZ=0              <NA>
# 5   005          ((s: Preface))    ((s: Preface))
# 6   006         ((m: both hands    ((s: Preface))
# 7   007 ((m: both hands clasped    ((s: Preface))
# 8   008 ((m: both hands clasped    ((s: Preface))
# 9   009       ((s: Background)) ((s: Background))
# 10  010        ((m: enumerating ((s: Background))
# 11  011        ((m: enumerating ((s: Background))
# 12  012              ((s: End))        ((s: End))
# 13  013         ((i: DH=1, SZ=0              <NA>
# 14  014    ((m: relax gesture))              <NA>

这将查找最后一次出现的story并删除除第一个之外的所有匹配项。这假设顺序很重要,并且假设最后一个story必须包含文字s: End,尽管您可以根据需要相应地更新逻辑。

仅供参考,我从ifelse更改为if_else,因为它是类型安全的(base::ifelse不是)。它需要具体说明使用哪种NA(有超过六种不同的变体)。

我们可以使用na.locf

library(dplyr)
library(zoo)
df %>%
mutate(story = ifelse(grepl("\(\(s", utterance), utterance, NA), 
ind = match("((s: End))", story),
story = replace(story, seq_len(first(ind)), 
zoo::na.locf0(story[seq_len(first(ind))])), ind = NULL)

-输出

line               utterance             story
1   001         ((m: both hands              <NA>
2   002         ((m: both hands              <NA>
3   003         ((i: DH=1, SZ=0              <NA>
4   004         ((i: DH=1, SZ=0              <NA>
5   005          ((s: Preface))    ((s: Preface))
6   006         ((m: both hands    ((s: Preface))
7   007 ((m: both hands clasped    ((s: Preface))
8   008 ((m: both hands clasped    ((s: Preface))
9   009       ((s: Background)) ((s: Background))
10  010        ((m: enumerating ((s: Background))
11  011        ((m: enumerating ((s: Background))
12  012              ((s: End))        ((s: End))
13  013         ((i: DH=1, SZ=0              <NA>
14  014    ((m: relax gesture))              <NA>

相关内容

最新更新