我希望这个问题不要被认为太宽泛。然而,我不知道如何以不同的方式问它:在他的youtube视频中,Ed Boone用r介绍了ABM,他编写了以下代码(只有一些变量名称被更改)。代码在1000个观察值下运行良好,但是当我增加观察值时,它变得非常慢。因此,我想提高函数的速度。我可以在每行之后手动添加start.time_x <- Sys.time()
等(看看需要花费很多时间),但我想知道是否有更好的方法来识别瓶颈,因为所有嵌套的for循环使得甚至相当复杂(因为我还必须考虑每个代码运行多少次):
# ABM_Covid - II
start.time <- Sys.time()
Data_Generator <- function(nPop1, E0, I0) {
# Create a population of susceptibles
Data <- data.frame( AgentNo=1:nPop1,
State="Susceptible",
Mixing= runif(nPop1,0,1),
TimeE = 0,
TimeI = 0,
stringsAsFactors = FALSE)
Data$State[1:E0] <- "Exposed" # This just says that the first person is exposed, since the mixing is random anyway, this is not an issue (because the mixing of Exposed is random)
Data$Time[1:E0] <- rbinom(E0, 13, 0.5) + 1 # Exposure up to 14 days
Data$State[(E0+1):(E0+I0)] <- "Infected"
Data$Time[(E0+1):(E0+I0)] <- rbinom(I0, 12, 0.5)
return(Data)
}
ABM_Covid <- function(Data, parameters, runtime){
nPop1 <- nrow(Data)
# runtime <- 15
Results <- data.frame( Susceptible = rep (0, runtime),
Exposed = rep (0, runtime),
Infected = rep (0, runtime),
Recovered = rep (0, runtime),
Deaths = rep (0, runtime))
# Move people through time
for (k in 1:runtime){
# Moving people through time
StateSusceptible <- (1:nPop1)[Data$State == "Susceptible"]
StateSusceptible_or_Exposed <- (1:nPop1)[Data$State == "Susceptible" | Data$State == "Exposed"]
for (i in StateSusceptible) {
# Determine if they like to meet others
Mix1 <- Data$Mixing[i]
# How many agents will they meet? The plus one meets everybody meets somebody
Meetings <- round(Mix1*parameters$MaxMix,0) + 1
# Grab the agents they will meet
People_met <- sample(StateSusceptible_or_Exposed, Meetings, replace=TRUE, prob = Data$Mixing[StateSusceptible_or_Exposed])
for (j in 1:length(People_met)) {
# Grab who they will meet
Meetingsa <- Data[People_met[j], ]
# If exposed change State
if(Meetingsa$State== "Exposed") {
Urand1 <- runif(1,0,1)
if (Urand1 < parameters$S2E){
Data$State[i] <- "Exposed"
}
}
}
}
# Grab those who have been exposed and increment
StateE1 <- (1:nPop1)[Data$State== "Exposed"]
Data$TimeE[StateE1] = Data$TimeE[StateE1] + 1
StateE2 <- (1:nPop1)[Data$State== "Exposed" & Data$TimeE > 14]
Data$State[StateE2] <- "Recovered"
# Grab those who could become sick
StateE3 <- (1:nPop1)[Data$State== "Exposed" & Data$TimeE > 3]
for (i in StateE3){
Urand1 <- runif(1,0,1)
# randomly assign whether they get sick or not
if ( Urand1 < parameters$E2I ) {
Data$State[i] <- "Infected"
}
}
# Update how long they have been sick
StateI1 <- (1:nPop1)[Data$State== "Infected"]
Data$TimeI[StateI1] = Data$TimeI[StateI1] + 1
# Recovered bin
StateI2 <- (1:nPop1)[Data$State== "Infected" & Data$TimeI > 14]
Data$State[StateI2] <- "R"
# Not recovered could potentially die
StateI3 <- (1:nPop1)[Data$State== "Infected" & Data$TimeI < 15]
Data$State[StateI3] <- ifelse(runif(length(StateI3), 0, 1 ) > parameters$I2D, "Infected", "Deaths")
Results$Susceptible[k] <- length(Data$State[Data$State=="Susceptible"])
Results$Exposed[k] <- length(Data$State[Data$State=="Exposed"])
Results$Infected[k] <- length(Data$State[Data$State=="Infected"])
Results$Recovered[k] <- length(Data$State[Data$State=="Recovered"])
Results$Deaths[k] <- length(Data$State[Data$State=="Deaths"])
}
return(Results)
}
Data <- Data_Generator(1000, E0=5, I0=2)
parameters <- data.frame( MaxMix = 10,
S2E = 0.25,
E2I = 0.1,
I2D = 0.1)
Model1 <- ABM_Covid(Data, parameters, runtime=25)
plot(1:25, Model1$Susceptible, type="l", col="purple", ylim = c(0,1000))
lines(1:25, Model1$Exposed, type="l", col="orange")
lines(1:25, Model1$Infected, type="l", col="red")
lines(1:25, Model1$Recovered, type="l", col="seagreen")
lines(1:25, Model1$Deaths, type="l", col="black")
end.time <- Sys.time()
time.taken <- end.time - start.time
这是优化函数的一部分(状态的更新)的一种方法。这样你就摆脱了i
环和j
环。
我不确定我是否理解了算法的每一个细节。但也许你可以使用这个解决方案的一些部分和想法。
update_State <- function(nmeeting, seed = 123) {
set.seed(seed)
x <- Data %>%
filter(State %in% c("Susceptible", "Exposed")) %>%
slice_sample(n = nmeeting) %>%
filter(State == "Exposed") %>%
summarise(prob = any(runif(n()) <= paramters$S2E)) %>%
pull(prob)
return(if (x) "Exposed" else "Susceptible")
}
Data %>%
filter(State %in% c("Susceptible", "Exposed")) %>%
mutate(
n_meetings = round(Mixing*parameters$MaxMix,0) + 1,
new_State = map(n_meetings, update_State) #this is the new State after all the Meetings have been made
)
这个想法是检查所有与给定的人见面的所有暴露的人。
运行时间为4-5秒。
大多数版本的R编译支持分析,参见?Rprof
。用法类似于:
Rprof()
# <Your code without the system.time calls>
Rprof(Null)
summaryRprof()
# $by.self
# self.time self.pct total.time total.pct
# "[" 0.20 26.32 0.62 81.58
# "[[.data.frame" 0.16 21.05 0.24 31.58
# "[.data.frame" 0.10 13.16 0.42 55.26
# ...
基于本页的示例。