r-如何删除基于特定日期的特定值

下面是我的数据帧的一部分：

Year Date        Location Tag Section domsec atDominantLocation
2017 2017-08-13        a wnd       n      n                  1
2017 2017-08-14        a wnd       n      n                  1
2017 2017-08-15        a wnd       n      n                  1
2017 2017-08-16        a wnd       n      n                  1
2017 2017-08-17        a wnd       n      n                  1
2017 2017-08-18        a wnd       n      n                  1
2017 2017-08-19        a wnd       n      n                  1
2017 2017-08-20        a wnd       n      n                  1
2017 2017-08-21        a wnd       b      n                  0
2017 2017-08-21        a wnd       u      n                  0
2017 2017-08-23        a wnd       f      n                  0
2018 2018-08-21        a wnd       f      s                  0
2018 2018-08-18        a wnd       h      s                  0
2018 2018-08-19        a wnd       o      s                  0
2018 2018-08-15        a wnd       s      s                  1
2018 2018-08-17        a wnd       s      s                  1
2018 2018-08-14        c wnd      ss      s                  0
2018 2018-08-16        a wnd       t      s                  0
2018 2018-08-21        a wnd       t      s                  0
2018 2018-08-13        c wnd      ww      s                  0
2018 2018-08-20        a wnd       y      s                  0

列"；atDominantLocation"；包含"；1〃；s和"；0"；s.我想保留所有的"；0"；s，但只保留"；1〃；最早日期。因此，应该只有一个"；1〃；对于每个标签和年份；1〃；应该是当年最早的日期。

以下是我想要的输出：

Year Date        Location Tag Section domsec atDominantLocation
2017 2017-08-13        a wnd       n      n                  1
2017 2017-08-21        a wnd       b      n                  0
2017 2017-08-21        a wnd       u      n                  0
2017 2017-08-23        a wnd       f      n                  0
2018 2018-08-21        a wnd       f      s                  0
2018 2018-08-18        a wnd       h      s                  0
2018 2018-08-19        a wnd       o      s                  0
2018 2018-08-15        a wnd       s      s                  1
2018 2018-08-14        c wnd      ss      s                  0
2018 2018-08-16        a wnd       t      s                  0
2018 2018-08-21        a wnd       t      s                  0
2018 2018-08-13        c wnd      ww      s                  0
2018 2018-08-20        a wnd       y      s                  0

我尝试过重复功能和唯一功能，但都没有成功。谢谢你的帮助。

另一个tidyverse选项是将filteratDominantLocation仅限于1s。然后，按Year和Tag排序和分组，然后使用slice获取最早日期的行。然后，我们可以将这些行绑定回原始数据帧，但仅绑定到atDominantLocation为0的行。

library(tidyverse)
df %>% 
filter(atDominantLocation == 1) %>% 
arrange(Date) %>% 
group_by(Year, Tag) %>% 
slice(1) %>% 
bind_rows(df %>% filter(atDominantLocation == 0)) %>% 
arrange(Date)

输出

Year Date       Location Tag   Section domsec atDominantLocation
<int> <chr>      <chr>    <chr> <chr>   <chr>               <int>
1  2017 2017-08-13 a        wnd   n       n                       1
2  2017 2017-08-21 a        wnd   b       n                       0
3  2017 2017-08-21 a        wnd   u       n                       0
4  2017 2017-08-23 a        wnd   f       n                       0
5  2018 2018-08-13 c        wnd   ww      s                       0
6  2018 2018-08-14 c        wnd   ss      s                       0
7  2018 2018-08-15 a        wnd   s       s                       1
8  2018 2018-08-16 a        wnd   t       s                       0
9  2018 2018-08-18 a        wnd   h       s                       0
10  2018 2018-08-19 a        wnd   o       s                       0
11  2018 2018-08-20 a        wnd   y       s                       0
12  2018 2018-08-21 a        wnd   f       s                       0
13  2018 2018-08-21 a        wnd   t       s                       0

数据

df <- structure(list(Year = c(2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
2017L, 2017L, 2017L, 2017L, 2017L, 2018L, 2018L, 2018L, 2018L, 
2018L, 2018L, 2018L, 2018L, 2018L, 2018L), 
Date = c("2017-08-13", "2017-08-14", "2017-08-15", "2017-08-16", "2017-08-17", "2017-08-18", 
"2017-08-19", "2017-08-20", "2017-08-21", "2017-08-21", "2017-08-23", 
"2018-08-21", "2018-08-18", "2018-08-19", "2018-08-15", "2018-08-17", 
"2018-08-14", "2018-08-16", "2018-08-21", "2018-08-13", "2018-08-20"
), Location = c("a", "a", "a", "a", "a", "a", "a", "a", "a", 
"a", "a", "a", "a", "a", "a", "a", "c", "a", "a", "c", "a"), 
Tag = c("wnd", "wnd", "wnd", "wnd", "wnd", "wnd", "wnd", 
"wnd", "wnd", "wnd", "wnd", "wnd", "wnd", "wnd", "wnd", "wnd", 
"wnd", "wnd", "wnd", "wnd", "wnd"), 
Section = c("n", "n", "n", "n", "n", "n", "n", "n", "b", "u", "f", "f", "h", "o", 
"s", "s", "ss", "t", "t", "ww", "y"), 
domsec = c("n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "s", "s", "s", 
"s", "s", "s", "s", "s", "s", "s"), 
atDominantLocation = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 
0L, 0L, 0L, 0L, 0L)), class = "data.frame", row.names = c(NA, -21L))

我们可以按"Year"、"Tag"进行分组，并在filter中创建逻辑表达式以对行进行子集设置-创建单独的逻辑表达式并将其与|(OR(连接，即，因为我们想要所有0值(atDominantLocation == 0(，并且只想要min最小值为"Date"的行，其中"atDominantLocation"为1(Date == min(Date[atDominantLocation == 1])(

library(dplyr)
df1 %>%
mutate(Date = as.Date(Date)) %>% 
group_by(Year, Tag) %>%
filter(atDominantLocation == 0| 
( Date == min(Date[atDominantLocation == 1]))) %>%
ungroup

-输出

# A tibble: 13 × 7
Year Date       Location Tag   Section domsec atDominantLocation
<int> <date>     <chr>    <chr> <chr>   <chr>               <int>
1  2017 2017-08-13 a        wnd   n       n                       1
2  2017 2017-08-21 a        wnd   b       n                       0
3  2017 2017-08-21 a        wnd   u       n                       0
4  2017 2017-08-23 a        wnd   f       n                       0
5  2018 2018-08-21 a        wnd   f       s                       0
6  2018 2018-08-18 a        wnd   h       s                       0
7  2018 2018-08-19 a        wnd   o       s                       0
8  2018 2018-08-15 a        wnd   s       s                       1
9  2018 2018-08-14 c        wnd   ss      s                       0
10  2018 2018-08-16 a        wnd   t       s                       0
11  2018 2018-08-21 a        wnd   t       s                       0
12  2018 2018-08-13 c        wnd   ww      s                       0
13  2018 2018-08-20 a        wnd   y       s                       0

数据

df1 <- structure(list(Year = c(2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
2017L, 2017L, 2017L, 2017L, 2017L, 2018L, 2018L, 2018L, 2018L, 
2018L, 2018L, 2018L, 2018L, 2018L, 2018L), Date = c("2017-08-13", 
"2017-08-14", "2017-08-15", "2017-08-16", "2017-08-17", "2017-08-18", 
"2017-08-19", "2017-08-20", "2017-08-21", "2017-08-21", "2017-08-23", 
"2018-08-21", "2018-08-18", "2018-08-19", "2018-08-15", "2018-08-17", 
"2018-08-14", "2018-08-16", "2018-08-21", "2018-08-13", "2018-08-20"
), Location = c("a", "a", "a", "a", "a", "a", "a", "a", "a", 
"a", "a", "a", "a", "a", "a", "a", "c", "a", "a", "c", "a"), 
Tag = c("wnd", "wnd", "wnd", "wnd", "wnd", "wnd", "wnd", 
"wnd", "wnd", "wnd", "wnd", "wnd", "wnd", "wnd", "wnd", "wnd", 
"wnd", "wnd", "wnd", "wnd", "wnd"), Section = c("n", "n", 
"n", "n", "n", "n", "n", "n", "b", "u", "f", "f", "h", "o", 
"s", "s", "ss", "t", "t", "ww", "y"), domsec = c("n", "n", 
"n", "n", "n", "n", "n", "n", "n", "n", "n", "s", "s", "s", 
"s", "s", "s", "s", "s", "s", "s"), atDominantLocation = c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 
0L, 0L, 0L, 0L, 0L)), class = "data.frame", row.names = c(NA, 
-21L))

library(data.table)
library(magrittr)
setDT(df)
rbindlist(
list(df[, .SD[atDominantLocation == 1][1], by = list(Year, Tag)],
df[atDominantLocation == 0,]), use.names=TRUE) %>% 
.[order(Tag, Date)]
#>     Year Tag       Date Location Section domsec atDominantLocation
#>  1: 2017 wnd 2017-08-13        a       n      n                  1
#>  2: 2017 wnd 2017-08-21        a       b      n                  0
#>  3: 2017 wnd 2017-08-21        a       u      n                  0
#>  4: 2017 wnd 2017-08-23        a       f      n                  0
#>  5: 2018 wnd 2018-08-13        c      ww      s                  0
#>  6: 2018 wnd 2018-08-14        c      ss      s                  0
#>  7: 2018 wnd 2018-08-15        a       s      s                  1
#>  8: 2018 wnd 2018-08-16        a       t      s                  0
#>  9: 2018 wnd 2018-08-18        a       h      s                  0
#> 10: 2018 wnd 2018-08-19        a       o      s                  0
#> 11: 2018 wnd 2018-08-20        a       y      s                  0
#> 12: 2018 wnd 2018-08-21        a       f      s                  0
#> 13: 2018 wnd 2018-08-21        a       t      s                  0

^{创建于2022-02-09由reprex包(v2.0.1(}

数据

相关内容

最新更新

热门标签：