r语言 - 调整使用数据的代码.表函数



下面的代码使用data.table函数生成一个输出表。然而,我想知道是否有可能以某种方式优化代码,仍然得到相同的结果?我们的想法是减少代码以减少处理时间。

library(dplyr)
library(tidyr)
library(lubridate)
library(data.table)
df1 <- structure(
list(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28","2021-06-28",
"2021-06-28","2021-06-28","2021-06-28"),
date2 = c("2021-06-25","2021-06-25","2021-06-27","2021-07-07","2021-07-07","2021-07-09","2021-07-09","2021-07-09"),
Code = c("FDE","ABC","ABC","ABC","CDE","FGE","ABC","CDE"),
Week= c("Wednesday","Wednesday","Friday","Wednesday","Wednesday","Friday","Friday","Friday"),
DR1 = c(4,1,4,3,6,4,3,5),
DR01 = c(4,1,4,3,3,4,3,6), DR02= c(4,2,6,7,3,2,7,4),DR03= c(9,5,4,3,3,2,1,5),
DR04 = c(5,4,3,3,6,2,1,9),DR05 = c(5,4,5,3,6,2,1,9),
DR06 = c(2,4,3,3,5,6,7,8),DR07 = c(2,5,4,4,9,4,7,8),
DR08 = c(4,0,0,1,2,4,4,4),DR09 = c(2,5,4,4,9,4,7,8),DR010 = c(2,5,4,4,9,4,7,8),DR011 = c(4,7,3,2,2,7,7,7), 
DR012 = c(4,4,2,3,0,4,4,5),DR013 = c(4,4,1,4,0,3,2,0),DR014 = c(0,3,1,2,0,2,NA,NA)),
class = "data.frame", row.names = c(NA, -8L))
selection = startsWith(names(df1), "DR0")
df1[selection][is.na(df1[selection])] = 0
dt1 <- as.data.table(df1)
cols <- grep("^DR0", colnames(dt1), value = TRUE)
medi_ana <- 
dt1[, (paste0(cols, "_PV")) := DR1 - .SD, .SDcols = cols
][, lapply(.SD, median), by = .(Code, Week), .SDcols = paste0(cols, "_PV") ]
f1 <- function(nm, pat) grep(pat, nm, value = TRUE)
nm1 <- f1(names(df1), "^DR0\d+$")
nm2 <- f1(names(medi_ana), "_PV")
nm3 <- paste0("i.", nm2)
setDT(df1)[medi_ana,  (nm2) := Map(`+`, mget(nm1), mget(nm3)), on = .(Code, Week)]
SPV1 <- df1[, c('date1', 'date2', 'Code', 'Week', nm2), with = FALSE]
dmda<-"2021-07-09"
code<-"CDE"
SPV2<-melt(SPV1[date2 == dmda & Code == code][, 
lapply(.SD, sum, na.rm = TRUE), by = Code, 
.SDcols = patterns("^DR0")],
id.var = "Code", variable.name = "name", value.name = "val")[, 
name := readr::parse_number(as.character(name))][]
> SPV2
Code name val
1:  CDE    1   5
2:  CDE    2   5
3:  CDE    3   5
4:  CDE    4   5
5:  CDE    5   5
6:  CDE    6   5
7:  CDE    7   5
8:  CDE    8   5
9:  CDE    9   5
10:  CDE   10   5
11:  CDE   11   5
12:  CDE   12   5
13:  CDE   13   5
14:  CDE   14   5
result <- SPV2 %>% 
group_by(Code) %>% 
slice((as.Date(dmda) - min(as.Date(df1$date1) [
df1$Code == first(Code)])):max(name)+1) %>%
ungroup
> result
# A tibble: 3 x 3
Code   name   val
<chr> <dbl> <dbl>
1 CDE      12     5
2 CDE      13     5
3 CDE      14     5

dplyr代码可转换为data.table代码

SPV2[na.omit(SPV2[, .I[(as.Date(dmda) - min(as.Date(df1$date1) [ 
df1$Code == first(Code)])):max(name)+1], .(Code)]$V1)]

与产出

Code  name   val
<char> <num> <num>
1:    CDE    12     5
2:    CDE    13     5
3:    CDE    14     5

最新更新