r-矢量化日期功能



我需要清理一系列大型dbs。其中一列是日期矢量。根据年份的不同,变量需要进行不同的清洁。

我写了一个函数,但它非常慢,尤其是因为我必须使用它rowwise()

我该如何做得更好?

这就是复杂的清洁功能:

cleandate_fn <- function(date,year){
if(year<=2010){
date = str_pad(date,8,"left","0") 
date = as.Date(date,format="%d%m%Y")
} else if (year==2011) {
date = str_pad(date,6,"left","0") 
date = ifelse(str_sub(date,1,2)=="02",paste0("20",str_sub(date,3,8)),date)
date = ifelse(str_sub(date,5,6)=="00",paste0(str_sub(date,1,4),"01",str_sub(date,7,8)),date)
date = gsub("\\|/","0",date)
date = ifelse(str_sub(date,5,6)=="00",paste0(str_sub(date,1,4),"01",str_sub(date,7,8)),date)
date = ifelse(str_sub(date,5,8) %in% c("0229","0230","0231"),paste0(str_sub(date,1,4),"0131"),date)
date = as.Date(date,format="%Y%m%d")
} else {
date = ifelse(str_sub(date,4,6)=="//1",paste0(str_sub(date,1,3),"/19",str_sub(date,7,8)),date)
date = ifelse(str_sub(date,4,6)=="//2",paste0(str_sub(date,1,3),"/20",str_sub(date,7,8)),date)
date = ifelse(str_sub(date,5,6)=="/9",paste0(str_sub(date,1,4),"19",str_sub(date,7,8)),date)    
date = ifelse(str_sub(date,5,6)=="/0",paste0(str_sub(date,1,4),"20",str_sub(date,7,8)),date)
date = gsub("\\|/","0",date)
date = ifelse(str_sub(date,1,2)=="00",paste0("01",str_sub(date,3,8)),date)
date = ifelse(str_sub(date,3,4)=="00",paste0(str_sub(date,1,2),"01",str_sub(date,5,8)),date)
date = paste0(str_sub(date,1,2),"05",str_sub(date,5,8))
date = as.Date(date,format="%d%m%Y")}
return(date)
}

玩具示例:

data <- data.frame(date=c("19052003","29062012","008//210","05/2/001","01011980"),
year=rep(2010,5)) 
data <- data %>% 
rowwise() %>% 
mutate(date=cleandate_fn(date,unique(data$year))) 

按"年份"进行分组,然后将"年份"的第一个元素作为函数的入口可能会更容易

library(dplyr)
df1 %>%
group_by(year) %>%
mutate(datenew = cleandate_fn(date, first(year))) %>%
ungroup

-输出

# A tibble: 5 × 3
date      year datenew   
<chr>    <dbl> <date>    
1 19052003  2010 2003-05-19
2 29062012  2010 2012-06-29
3 008//210  2010 NA        
4 05/2/001  2010 NA        
5 01011980  2010 1980-01-01

相关内容

  • 没有找到相关文章

最新更新