
  • 本文关键字:单词 范围 话语 r regex
  • 更新时间 :
  • 英文 :


tags <- c("are you", "are they", "aren't they", "aren't you", "can I", 
"can't ya", "can't you", "could he", "could she", "could you", 
"could they", "didn't it", "didn't you", "didn't we", "didn't she", 
"didn't they", "did he", "did she", "did you", "do I", "do we", 
"do you", "do they", "do you know what I mean", "you know what I mean", 
"does it", "does he", "does she", "doesn't he", "doesn't she", 
"doesn't it", "dunnit", "don't ya", "don't you", "don't they", 
"has he", "has it", "hasn't he", "hasn't she", "have I", "have you", 
"have they", "haven't they", "haven't you", "haven't we", "huh", 
"innit", "is it", "is he", "is she", "is there", "isn't he", 
"isn't it", "isn't it sweetheart", "isn't she", "isn't there", 
"might'n we", "should you", "shouldn't you", "was it", "wasn't she", 
"wasn't he", "was she", "was he", "wasn't it", "weren't they", 
"will he", "will she", "will it", "will there", "will they", 
"would he", "would she", "would ya", "would you", "wouldn't you", 
"wouldn't it", "wouldn't they", "wouldn't she", "wouldn't he", 
"wouldn't you", "won't it", "won't you", "won't they", "won't he", 
"won't she", "won't we", "you know", "you think", "ain't they", 
"don't we", "did i")
tst <- c("It's nice that length isn't it?",            # 4 words prior to question tag
"that wee boy sleepwalks, doesn't he?",       # 4 words
"well you know?",                             # 1 word     
"Sandy Row's isn't it?",                      # 2 words      <-- should match
"Good this week, innit?",                     # 3 words      <-- should match
"in front of witnesses, don't you")           # 4 words


patt_tag <- paste0(".*(?:\S+[\s,.!?]){2,3}\b(", paste0(tags, collapse = "|"), ")\b(\.|\?|!|,)?$")


tst[grepl(patt_tag, tst, perl = T)]
[1] "It's nice that length isn't it?"      "that wee boy sleepwalks, doesn't he?" "Sandy Row's isn't it?"               
[4] "Good this week, innit?"               "in front of witnesses, don't you" 


"Sandy Row's isn't it?" "Good this week, innit?"


patt_tag <- paste0(".*(?中的.*更改为^->patt_tag <- paste0("^(?。。。

patt_tag <- paste0("^(?:\S+[\s,.!?]){2,3}\b(", paste0(tags, collapse = "|"), ")\b(\.|\?|!|,)?$")
tst[grepl(patt_tag, tst, perl = T)]
#[1] "Sandy Row's isn't it?"  "Good this week, innit?"
