对于R中的循环:如何应用定义的函数来循环R中的行和列



我有一个名为trainset的数据集,它有50个变量。对于每一行,我需要将Systolic.Blood.Pressure、Blood.Real.Nitrogen、Blood.Sodium、Age、heart.rR和COPD值列下的值相加。我已经定义了一个函数来总结这些:

m.gwtg = function(Systolic.BP, Sodium, COPD){
if (Systolic.BP>=200){
pt.sbp = 0
}else if (Systolic.BP>= 190){
pt.sbp = 2
}else if (Systolic.BP>= 180){
pt.sbp = 4
}else if (Systolic.BP>= 170){
pt.sbp = 6
}else if (Systolic.BP>= 160){
pt.sbp = 8
}else if (Systolic.BP>= 150){
pt.sbp = 9
}else if (Systolic.BP>= 140){
if (Sodium>=139){
pt.sodium =  0
}else if (Sodium>= 137){
pt.sodium =  1
}else if (Sodium>= 134){
pt.sodium =  2
}
if (Age>=110){
pt.age = 28
}else if (Age>= 100){
pt.age = 25
}else if (Age>= 90){
pt.age = 22
}else if (Age>= 80){
pt.age = 19
}else if (Age>= 70){
if (COPD == 1){
pt.copd =2
} else {
pt.copd = 0
}
total = pt.sbp + pt.bun+ pt.sodium +pt.age + pt.hr+ pt.copd
return(if (total < 79){
outcome = 0
} else {
outcome = 1
})
}

我在编码for循环以循环通过训练集并应用为每行定义的函数时遇到了问题。我试着把这个循环编码成这样:

for (i in 1:nrow(trainset)) {
Systolic.BP[i] <- trainset$Systolic.blood.pressure[i]
Sodium[i]  <- trainset$Blood.sodium[i]
COPD[i]  <- trainset$COPD[i]
total[i] <- Systolic.BP[i]+ Sodium[i] +COPD[i]

outcome.gwtg.trainset[i]= m.gwtg(total[i])

}

我对for循环外的代码感到很困惑。谢谢你的回答!

在R中,您应该尽可能避免循环和类似的函数(例如*apply()purrr::map()(。R设计用于处理向量,并且循环要慢得多。

不执行此循环操作,而是执行以下操作:

  1. 将每列重新编码为其转换值
  2. 将转换后的值相加
  3. 将转换后的值重新编码为0/1结果

这些操作应该按向量进行,以提高效率(并避免遇到棘手的索引问题(。

例如:

m.gwtg <- function(data) {
data <- dplyr::mutate(data, 
pt.sbp = dplyr::case_when(
Systolic.BP >= 200 ~ 0,
Systolic.BP >= 190 ~ 2,
Systolic.BP >= 180 ~ 4,
Systolic.BP >= 170 ~ 6,
Systolic.BP >= 160 ~ 8,
Systolic.BP >= 150 ~ 9,
Systolic.BP >= 140 ~ 11,
Systolic.BP >= 130 ~ 13,
Systolic.BP >= 120 ~ 15,
Systolic.BP >= 110 ~ 17,
Systolic.BP >= 100 ~ 19,
Systolic.BP >=  90 ~ 21,
Systolic.BP >=  80 ~ 23,
Systolic.BP >=  70 ~ 24,
Systolic.BP >=  60 ~ 26,
Systolic.BP >=  50 ~ 28, # should Systolic.BP < 50 be NA or 28?
# else
TRUE ~ NA_real_
),
pt.bun = dplyr::case_when(
BUN >= 150 ~ 28,
BUN >= 140 ~ 27,
BUN >= 130 ~ 25,
BUN >= 120 ~ 23,
BUN >= 110 ~ 21,
BUN >= 100 ~ 19,
BUN >=  90 ~ 17,
BUN >=  80 ~ 15,
BUN >=  70 ~ 13,
BUN >=  60 ~ 11,
BUN >=  50 ~  9,
BUN >=  40 ~  8,
BUN >=  30 ~  6,
BUN >=  20 ~  4,
BUN >=  10 ~  2,
BUN  <  10 ~  0,
# else
TRUE ~ NA_real_
),
pt.sodium = dplyr::case_when(
Sodium >= 139 ~ 0,
Sodium >= 137 ~ 1,
Sodium >= 134 ~ 2,
Sodium >= 131 ~ 3,
Sodium  < 131 ~ 4,
# else
TRUE ~ NA_real_
),
pt.age = dplyr::case_when(
Age >= 110 ~ 28,
Age >= 100 ~ 25,
Age >=  90 ~ 22,
Age >=  80 ~ 19,
Age <=  70 ~ 17,
Age >=  60 ~ 14,
Age >=  50 ~ 11,
Age <=  40 ~  8,
Age >=  30 ~  6,
Age >=  20 ~  3,
Age  <  20 ~  0,
# else
TRUE ~ NA_real_
),
pt.hr = dplyr::case_when(
HR >= 105 ~ 8,
HR >= 100 ~ 6,
HR >=  95 ~ 5,
HR >=  90 ~ 4,
HR >=  85 ~ 3,
HR >=  80 ~ 1,
HR  <  80 ~ 0,
# else
TRUE ~ NA_real_
),
pt.copd = dplyr::case_when(
COPD == 1 ~ 2,
COPD == 0 ~ 0,
# else
TRUE ~ NA_real_
),
total = pt.sbp + pt.bun + pt.sodium + pt.age + pt.hr + pt.copd,
outcome = dplyr::if_else(total < 79, 0, 1)
)
return(data)
}
example_data <- data.frame(
Systolic.BP = c(170, 160, 200), 
BUN = c(60, 150, 10), 
Sodium = c(134, 131, 139), 
Age = c(40, 80, 20), 
HR = c(90, 105, 80), 
COPD = c(1, 0, 0)
)
m.gwtg(example_data)
#>   Systolic.BP BUN Sodium Age  HR COPD pt.sbp pt.bun pt.sodium pt.age pt.hr
#> 1         170  60    134  40  90    1      6     11         2     17     4
#> 2         160 150    131  80 105    0      8     28         3     19     8
#> 3         200  10    139  20  80    0      0      2         0     17     1
#>   pt.copd total outcome
#> 1       2    42       0
#> 2       0    66       0
#> 3       0    20       0

创建于2022-03-25由reprex包(v2.0.1(

这是一种更简洁的计算结果的方法:

library(tidyverse)
calc_score <- function(systolic_bp, bun, sodium) {
systolic_bp_score <- case_when(
systolic_bp >= 200 ~ 0,
systolic_bp >= 190 ~ 2,
systolic_bp >= 180 ~ 4
)
bun_score <- case_when(
bun >= 150 ~ 28,
bun >= 140 ~ 27
)
sodium_score <- case_when(
sodium >= 139 ~ 0,
sodium >= 137 ~ 1
)
systolic_bp_score + bun_score + sodium_score
}
# example data
trainset <- tibble(
systolic_bp = c(180, 195),
bun = c(145, 180),
sodium = c(138, 140)
)
trainset %>%
mutate(
score = list(systolic_bp, bun, sodium) %>% pmap_dbl(calc_score),
outcome = as.numeric(score > 97)
)
#> # A tibble: 2 × 5
#>   systolic_bp   bun sodium score outcome
#>         <dbl> <dbl>  <dbl> <dbl>   <dbl>
#> 1         180   145    138    32       0
#> 2         195   180    140    30       0

创建于2022-03-25由reprex包(v2.0.0(

最新更新