根据r中data.table中的多个条件创建一列



我希望使用data.table在另外两个列条件的基础上创建一个新列

group <- c(1,1,1,2,2,2,3,3,3,4,4,4)  
date <- c(6,2,3,7,6,9,7,1,4,6,8,9)
val1<- c("","A","A","","A","A","","A","A","","A","A")
df1<-data.frame(group,date,val1)
dt1<-as.data.table(df1)

这是输出:

group date val1
1    6     
1    2    A
1    3    A
2    7     
2    6    A
2    9    A
3    7     
3    1    A
3    4    A
4    6     
4    8    A
4    9    A

我想找到日期的最小值,给定每组(1,2,3,4(中的val1=A,如下所示:

group date val1 findmin
1    6             
1    2    A       Y
1    3    A        
2    7             
2    6    A       Y
2    9    A        
3    7             
3    1    A       Y
3    4    A        
4    6             
4    8    A       Y
4    9    A        

我试过

dt1[,findmin:= ifelse(date=min(date[val1 == "A"])),"Y","", by = group]

读作:如果date minimum date其中val1="A",则在名为"findmin"的新列中放一个"Y",否则不放任何内容,并对每组(1,2,3,4(执行此操作。我得到这个错误:

Error in `[.data.table`(dt1, , `:=`(findmin, ifelse(min(date[val1 == "A"]))),  : 
Provide either by= or keyby= but not both

我很感激你的帮助,谢谢!

您必须小心使用括号,并使用==:检查相等性

dt1[,findmin := fifelse(date == min(date[val1 == "A"]), "Y", ""), by = group]

此代码使用dplyr工作。我相信还有一种更优雅的方法可以做到这一点。

if (!require(dplyr)) {
install.packages("dplyr")
}
library(dplyr)
if (!require(data.table)) {
install.packages("data.table")
}
library(data.table)
group <- c(1,1,1,2,2,2,3,3,3,4,4,4)  
date <- c(6,2,3,7,6,9,7,1,4,6,8,9)
val1<- c("","A","A","","A","A","","A","A","","A","A")
df1<-data.frame(group,date,val1)
dt1<-as.data.table(df1)
# filter for A
df2 <- df1 %>% filter(val1 == "A")
# group by group, arrange by date, get the 1st row, ungroup, add findmin = Y
df3 <- df2 %>% group_by(group) %>% arrange(date) %>% slice(1) %>% ungroup() %>% mutate(findmin = "Y", )
# join back to the original data
df4 <- df1 %>% left_join(df3, by = c("group", "date", "val1"))
# set NA in findmin to "" if you want
df5 <- df4 %>% mutate(findmin = ifelse(is.na(findmin), "", findmin))
# print
df5
group date val1 findmin
1      1    6             
2      1    2    A       Y
3      1    3    A        
4      2    7             
5      2    6    A       Y
6      2    9    A        
7      3    7             
8      3    1    A       Y
9      3    4    A        
10     4    6             
11     4    8    A       Y
12     4    9    A        

随机数据测试

# test randomized
df6 <- sample_frac(df1, size=1)
df6
group date val1
1      3    4    A
2      3    1    A
3      4    8    A
4      4    6     
5      4    9    A
6      2    9    A
7      2    7     
8      3    7     
9      1    3    A
10     1    6     
11     2    6    A
12     1    2    A
df6 <- df6 %>% 
filter(val1 == "A") %>% 
group_by(group) %>%
arrange(date) %>%
slice(1) %>%
ungroup() %>%
mutate(findmin = "Y", )
df7 <- df1 %>%
left_join(df6, by = c("group", "date", "val1")) %>%
mutate(findmin = ifelse(is.na(findmin), "", findmin)) %>%
arrange(group, val1, date, findmin)
df7
group date val1 findmin
1      1    6             
2      1    2    A       Y
3      1    3    A        
4      2    7             
5      2    6    A       Y
6      2    9    A        
7      3    7             
8      3    1    A       Y
9      3    4    A        
10     4    6             
11     4    8    A       Y
12     4    9    A     

代替排列和切片的.min替代用途

df6 <- sample_frac(df1, size=1)
df6
df6 <- df6 %>% 
filter(val1 == "A") %>% 
group_by(group) %>%
slice(which.min(date)) %>% 
ungroup() %>%
mutate(findmin = "Y", )
df7 <- df1 %>%
left_join(df6, by = c("group", "date", "val1")) %>%
mutate(findmin = ifelse(is.na(findmin), "", findmin)) %>%
arrange(group, val1, date, findmin)
df7
group date val1 findmin
1      1    6             
2      1    2    A       Y
3      1    3    A        
4      2    7             
5      2    6    A       Y
6      2    9    A        
7      3    7             
8      3    1    A       Y
9      3    4    A        
10     4    6             
11     4    8    A       Y
12     4    9    A        

最新更新