我有一个名为trainset的数据集,它有50个变量。对于每一行,我需要将Systolic.Blood.Pressure、Blood.Real.Nitrogen、Blood.Sodium、Age、heart.rR和COPD值列下的值相加。我已经定义了一个函数来总结这些:
m.gwtg = function(Systolic.BP, Sodium, COPD){
if (Systolic.BP>=200){
pt.sbp = 0
}else if (Systolic.BP>= 190){
pt.sbp = 2
}else if (Systolic.BP>= 180){
pt.sbp = 4
}else if (Systolic.BP>= 170){
pt.sbp = 6
}else if (Systolic.BP>= 160){
pt.sbp = 8
}else if (Systolic.BP>= 150){
pt.sbp = 9
}else if (Systolic.BP>= 140){
if (Sodium>=139){
pt.sodium = 0
}else if (Sodium>= 137){
pt.sodium = 1
}else if (Sodium>= 134){
pt.sodium = 2
}
if (Age>=110){
pt.age = 28
}else if (Age>= 100){
pt.age = 25
}else if (Age>= 90){
pt.age = 22
}else if (Age>= 80){
pt.age = 19
}else if (Age>= 70){
if (COPD == 1){
pt.copd =2
} else {
pt.copd = 0
}
total = pt.sbp + pt.bun+ pt.sodium +pt.age + pt.hr+ pt.copd
return(if (total < 79){
outcome = 0
} else {
outcome = 1
})
}
我在编码for循环以循环通过训练集并应用为每行定义的函数时遇到了问题。我试着把这个循环编码成这样:
for (i in 1:nrow(trainset)) {
Systolic.BP[i] <- trainset$Systolic.blood.pressure[i]
Sodium[i] <- trainset$Blood.sodium[i]
COPD[i] <- trainset$COPD[i]
total[i] <- Systolic.BP[i]+ Sodium[i] +COPD[i]
outcome.gwtg.trainset[i]= m.gwtg(total[i])
}
我对for循环外的代码感到很困惑。谢谢你的回答!
在R中,您应该尽可能避免循环和类似的函数(例如*apply()
和purrr::map()
(。R设计用于处理向量,并且循环要慢得多。
不执行此循环操作,而是执行以下操作:
- 将每列重新编码为其转换值
- 将转换后的值相加
- 将转换后的值重新编码为0/1结果
这些操作应该按向量进行,以提高效率(并避免遇到棘手的索引问题(。
例如:
m.gwtg <- function(data) {
data <- dplyr::mutate(data,
pt.sbp = dplyr::case_when(
Systolic.BP >= 200 ~ 0,
Systolic.BP >= 190 ~ 2,
Systolic.BP >= 180 ~ 4,
Systolic.BP >= 170 ~ 6,
Systolic.BP >= 160 ~ 8,
Systolic.BP >= 150 ~ 9,
Systolic.BP >= 140 ~ 11,
Systolic.BP >= 130 ~ 13,
Systolic.BP >= 120 ~ 15,
Systolic.BP >= 110 ~ 17,
Systolic.BP >= 100 ~ 19,
Systolic.BP >= 90 ~ 21,
Systolic.BP >= 80 ~ 23,
Systolic.BP >= 70 ~ 24,
Systolic.BP >= 60 ~ 26,
Systolic.BP >= 50 ~ 28, # should Systolic.BP < 50 be NA or 28?
# else
TRUE ~ NA_real_
),
pt.bun = dplyr::case_when(
BUN >= 150 ~ 28,
BUN >= 140 ~ 27,
BUN >= 130 ~ 25,
BUN >= 120 ~ 23,
BUN >= 110 ~ 21,
BUN >= 100 ~ 19,
BUN >= 90 ~ 17,
BUN >= 80 ~ 15,
BUN >= 70 ~ 13,
BUN >= 60 ~ 11,
BUN >= 50 ~ 9,
BUN >= 40 ~ 8,
BUN >= 30 ~ 6,
BUN >= 20 ~ 4,
BUN >= 10 ~ 2,
BUN < 10 ~ 0,
# else
TRUE ~ NA_real_
),
pt.sodium = dplyr::case_when(
Sodium >= 139 ~ 0,
Sodium >= 137 ~ 1,
Sodium >= 134 ~ 2,
Sodium >= 131 ~ 3,
Sodium < 131 ~ 4,
# else
TRUE ~ NA_real_
),
pt.age = dplyr::case_when(
Age >= 110 ~ 28,
Age >= 100 ~ 25,
Age >= 90 ~ 22,
Age >= 80 ~ 19,
Age <= 70 ~ 17,
Age >= 60 ~ 14,
Age >= 50 ~ 11,
Age <= 40 ~ 8,
Age >= 30 ~ 6,
Age >= 20 ~ 3,
Age < 20 ~ 0,
# else
TRUE ~ NA_real_
),
pt.hr = dplyr::case_when(
HR >= 105 ~ 8,
HR >= 100 ~ 6,
HR >= 95 ~ 5,
HR >= 90 ~ 4,
HR >= 85 ~ 3,
HR >= 80 ~ 1,
HR < 80 ~ 0,
# else
TRUE ~ NA_real_
),
pt.copd = dplyr::case_when(
COPD == 1 ~ 2,
COPD == 0 ~ 0,
# else
TRUE ~ NA_real_
),
total = pt.sbp + pt.bun + pt.sodium + pt.age + pt.hr + pt.copd,
outcome = dplyr::if_else(total < 79, 0, 1)
)
return(data)
}
example_data <- data.frame(
Systolic.BP = c(170, 160, 200),
BUN = c(60, 150, 10),
Sodium = c(134, 131, 139),
Age = c(40, 80, 20),
HR = c(90, 105, 80),
COPD = c(1, 0, 0)
)
m.gwtg(example_data)
#> Systolic.BP BUN Sodium Age HR COPD pt.sbp pt.bun pt.sodium pt.age pt.hr
#> 1 170 60 134 40 90 1 6 11 2 17 4
#> 2 160 150 131 80 105 0 8 28 3 19 8
#> 3 200 10 139 20 80 0 0 2 0 17 1
#> pt.copd total outcome
#> 1 2 42 0
#> 2 0 66 0
#> 3 0 20 0
创建于2022-03-25由reprex包(v2.0.1(
这是一种更简洁的计算结果的方法:
library(tidyverse)
calc_score <- function(systolic_bp, bun, sodium) {
systolic_bp_score <- case_when(
systolic_bp >= 200 ~ 0,
systolic_bp >= 190 ~ 2,
systolic_bp >= 180 ~ 4
)
bun_score <- case_when(
bun >= 150 ~ 28,
bun >= 140 ~ 27
)
sodium_score <- case_when(
sodium >= 139 ~ 0,
sodium >= 137 ~ 1
)
systolic_bp_score + bun_score + sodium_score
}
# example data
trainset <- tibble(
systolic_bp = c(180, 195),
bun = c(145, 180),
sodium = c(138, 140)
)
trainset %>%
mutate(
score = list(systolic_bp, bun, sodium) %>% pmap_dbl(calc_score),
outcome = as.numeric(score > 97)
)
#> # A tibble: 2 × 5
#> systolic_bp bun sodium score outcome
#> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 180 145 138 32 0
#> 2 195 180 140 30 0
创建于2022-03-25由reprex包(v2.0.0(