我的输入数据X
是一个矩阵的单行形式,每秒更新几次
# fake data
set.seed(123)
X <- matrix(rnorm(1),ncol = 10,nrow = 1)
我有一个模型的形式规则,许多规则,大约一千。为了不复制我的模型,我将创建一个假的。
模型中的规则可以是不同尺寸
fake_rules_model <- function(n=1000, ncolX){
idx <- function() sample(1:ncolX,n,replace = T)
val <- function() round(rnorm(n),2)
op <- function() sample(c("<=",">="),n,replace = T)
rules <- paste0("X[,",idx(),"]",op(),val()," & ","X[,",idx(),"]",op(),val())
return(rules)}
model <- fake_rules_model(ncolX = ncol(X))
我有一个执行模型model_execute
的函数
model_execute <- function(model){
res <- rep(F,length(model))
for(i in 1:length(model)) res[i] <- eval(str2lang( model[i] ))
return(res)
}
model_execute(model = model)
[1] FALSE TRUE FALSE TRUE TRUE FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE
[14] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[27] FALSE TRUE TRUE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE
[40] TRUE TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE
但是对我来说太慢了
问题:如何加速model_execute
函数
您可以尝试构造矩阵并比较它们:
f <- function(model){
m <- do.call(
rbind,
stringr::str_match_all(
model,
"(\[,([0-9]+)\](<=|>=)(-?[0-9]+\.?[0-9]*))+"
)
)
m <- m[,3:5]
X_m <- matrix(X[,as.integer(m[,1])], ncol = 2, byrow = TRUE)
val_m <- matrix(as.numeric(m[,3]), ncol = 2, byrow = TRUE)
op_m <- matrix(m[,2], ncol = 2, byrow = TRUE)
modify <- which(op_m == ">=")
X_m[modify] <- -1 * X_m[modify]
val_m[modify] <- -1 * val_m[modify]
apply(X_m <= val_m, 1, all)
}
> identical(f(model), model_execute(model))
[1] TRUE
> rbenchmark::benchmark(
+ model_execute = model_execute(model),
+ f = f(model)
+ )
test replications elapsed relative user.self sys.self user.child sys.child
2 f 100 0.33 1.000 0.33 0 NA NA
1 model_execute 100 4.27 12.939 4.25 0 NA NA
编辑
你可以泛化,但它会比第一个变体慢:
ff <- function(model){
lgl <- lapply(
stringr::str_match_all(
model,
"\[,([0-9]+)\](<=|>=)(-?[0-9]+\.?[0-9]*)"
),
function(x){
l_val <- X[,as.integer(x[,2])]
s <- x[,3]
r_val <- as.numeric(x[,4])
modify <- which(s == ">=")
l_val[modify] <- -l_val[modify]
r_val[modify] <- -r_val[modify]
all(l_val <=r_val)
}
)
unlist(lgl)
}
test replications elapsed relative user.self sys.self user.child sys.child
2 ff 100 0.67 1.000 0.67 0.00 NA NA
1 model_execute 100 4.34 6.478 4.32 0.02 NA NA