r语言 - 通过识别速度函数的瓶颈来改进速度函数



我希望这个问题不要被认为太宽泛。然而,我不知道如何以不同的方式问它:在他的youtube视频中,Ed Boone用r介绍了ABM,他编写了以下代码(只有一些变量名称被更改)。代码在1000个观察值下运行良好,但是当我增加观察值时,它变得非常慢。因此,我想提高函数的速度。我可以在每行之后手动添加start.time_x <- Sys.time()等(看看需要花费很多时间),但我想知道是否有更好的方法来识别瓶颈,因为所有嵌套的for循环使得甚至相当复杂(因为我还必须考虑每个代码运行多少次):

# ABM_Covid - II
start.time <- Sys.time()
Data_Generator <- function(nPop1, E0, I0) {
# Create a population of susceptibles
Data <- data.frame( AgentNo=1:nPop1,
State="Susceptible",
Mixing= runif(nPop1,0,1),
TimeE = 0,
TimeI = 0,
stringsAsFactors = FALSE)
Data$State[1:E0] <- "Exposed"  # This just says that the first person is exposed, since the mixing is random anyway, this is not an issue (because the mixing of Exposed is random)
Data$Time[1:E0] <- rbinom(E0, 13, 0.5) + 1 # Exposure up to 14 days
Data$State[(E0+1):(E0+I0)] <- "Infected"
Data$Time[(E0+1):(E0+I0)] <- rbinom(I0, 12, 0.5)
return(Data)
}
ABM_Covid <- function(Data, parameters, runtime){
nPop1 <- nrow(Data)
# runtime <- 15
Results <- data.frame(  Susceptible     = rep (0, runtime),
Exposed         = rep (0, runtime),
Infected        = rep (0, runtime),
Recovered       = rep (0, runtime),
Deaths          = rep (0, runtime))
# Move people through time
for (k in 1:runtime){
# Moving people through time
StateSusceptible <- (1:nPop1)[Data$State == "Susceptible"]
StateSusceptible_or_Exposed <- (1:nPop1)[Data$State == "Susceptible" | Data$State == "Exposed"]
for (i in StateSusceptible) {
# Determine if they like to meet others
Mix1 <- Data$Mixing[i]
# How many agents will they meet? The plus one meets everybody meets somebody
Meetings <- round(Mix1*parameters$MaxMix,0) + 1
# Grab the agents they will meet
People_met <- sample(StateSusceptible_or_Exposed, Meetings, replace=TRUE, prob = Data$Mixing[StateSusceptible_or_Exposed])
for (j in 1:length(People_met)) {
# Grab who they will meet
Meetingsa <- Data[People_met[j], ]
# If exposed change State
if(Meetingsa$State== "Exposed") {
Urand1 <- runif(1,0,1)
if (Urand1 < parameters$S2E){
Data$State[i] <-  "Exposed"
}   
}
}
}
# Grab those who have been exposed and increment
StateE1 <- (1:nPop1)[Data$State== "Exposed"]
Data$TimeE[StateE1] = Data$TimeE[StateE1] + 1
StateE2 <- (1:nPop1)[Data$State== "Exposed" & Data$TimeE > 14]
Data$State[StateE2] <- "Recovered"    
# Grab those who could become sick
StateE3 <- (1:nPop1)[Data$State== "Exposed" & Data$TimeE > 3]
for (i in StateE3){
Urand1 <- runif(1,0,1)
# randomly assign whether they get sick or not
if ( Urand1 < parameters$E2I ) {
Data$State[i] <- "Infected"
}
}
# Update how long they have been sick
StateI1 <- (1:nPop1)[Data$State== "Infected"]
Data$TimeI[StateI1] = Data$TimeI[StateI1] + 1
# Recovered bin
StateI2 <- (1:nPop1)[Data$State== "Infected" & Data$TimeI > 14]
Data$State[StateI2] <- "R"
# Not recovered could potentially die
StateI3 <- (1:nPop1)[Data$State== "Infected" & Data$TimeI < 15]
Data$State[StateI3] <- ifelse(runif(length(StateI3), 0, 1 ) > parameters$I2D, "Infected", "Deaths")
Results$Susceptible[k] <- length(Data$State[Data$State=="Susceptible"])
Results$Exposed[k] <- length(Data$State[Data$State=="Exposed"])
Results$Infected[k] <- length(Data$State[Data$State=="Infected"])
Results$Recovered[k] <- length(Data$State[Data$State=="Recovered"])
Results$Deaths[k] <- length(Data$State[Data$State=="Deaths"])
}
return(Results)
}
Data <- Data_Generator(1000, E0=5, I0=2)
parameters <- data.frame( MaxMix = 10,
S2E = 0.25,
E2I = 0.1,
I2D = 0.1)
Model1 <- ABM_Covid(Data, parameters, runtime=25)
plot(1:25, Model1$Susceptible, type="l", col="purple", ylim = c(0,1000))
lines(1:25, Model1$Exposed, type="l", col="orange")
lines(1:25, Model1$Infected, type="l", col="red")
lines(1:25, Model1$Recovered, type="l", col="seagreen")
lines(1:25, Model1$Deaths, type="l", col="black")
end.time <- Sys.time()
time.taken <- end.time - start.time

这是优化函数的一部分(状态的更新)的一种方法。这样你就摆脱了i环和j环。

我不确定我是否理解了算法的每一个细节。但也许你可以使用这个解决方案的一些部分和想法。

update_State <- function(nmeeting, seed = 123) {
set.seed(seed)
x <- Data %>%
filter(State %in% c("Susceptible", "Exposed")) %>%
slice_sample(n = nmeeting) %>%
filter(State == "Exposed") %>%
summarise(prob = any(runif(n()) <= paramters$S2E)) %>%
pull(prob)

return(if (x) "Exposed" else "Susceptible")

}
Data %>% 
filter(State %in% c("Susceptible", "Exposed")) %>%
mutate(
n_meetings =  round(Mixing*parameters$MaxMix,0) + 1,
new_State = map(n_meetings, update_State) #this is the new State after all the Meetings have been made
)

这个想法是检查所有与给定的人见面的所有暴露的人。

运行时间为4-5秒。

大多数版本的R编译支持分析,参见?Rprof。用法类似于:

Rprof()
# <Your code without the system.time calls>
Rprof(Null)
summaryRprof()
# $by.self
#                 self.time self.pct total.time total.pct
# "["                  0.20    26.32       0.62     81.58
# "[[.data.frame"      0.16    21.05       0.24     31.58
# "[.data.frame"       0.10    13.16       0.42     55.26
# ...

基于本页的示例。

最新更新