我正在重做一项研究作业,看看我是否可以改进它并重新投入其中;状态";以及";结果";返回给定结果/疾病死亡率最低的州的医院名称。由于某种原因,我的filter(!is.na(((行似乎不起作用。我有一种感觉,这与我使用粘贴来选择列名有关,但在我的炖菜中,这似乎无关紧要。
这是代码:
library(dplyr)
data <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
dataSelected <- data %>%
select("Hospital.Name", "State", "Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack", "Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure", "Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia")
colnames(dataSelected) <- c("HostpitalName","State","DeathRateHeartAttack","DeathRateHeartFailure","DeathRatePneumonia")
dataSelected[,3] <- as.numeric(dataSelected[,3])
dataSelected[,4] <- as.numeric(dataSelected[,4])
dataSelected[,5] <- as.numeric(dataSelected[,5])
best <- function(state,outcome){
column <- paste('DeathRate',outcome, sep = "")
if (state %in% dataSelected$State < 1){
return('Invalid state')
} else if (column %in% colnames(dataSelected) < 1){
return('Invalid outcome')
} else{
BestHospitals <- dataSelected %>%
select(HostpitalName,State,column) %>%
filter(!is.na(column)) %>%
filter(State == state) %>%
arrange(column,HostpitalName)
return(BestHospitals[1,1])
}
}
我的函数调用
best("AL","HeartAttack")
版本信息
平台x86_64-apple-darwin15.6.0
arch x86_64
os darwin15.6.0
系统x86_64,darwin15.6.0
状态
主要3
次要6.1
2019年
月07日
第05天
svn修订版76782
语言R
版本。字符串R版本3.6.1(2019-07-05(昵称脚趾动作
dput的输出(磁头(dataSelected((:
structure(list(HostpitalName = c("SOUTHEAST ALABAMA MEDICAL CENTER",
"MARSHALL MEDICAL CENTER SOUTH", "ELIZA COFFEE MEMORIAL HOSPITAL",
"MIZELL MEMORIAL HOSPITAL", "CRENSHAW COMMUNITY HOSPITAL", "MARSHALL MEDICAL CENTER NORTH"
), State = c("AL", "AL", "AL", "AL", "AL", "AL"), DeathRateHeartAttack = c(14.3,
18.5, 18.1, NA, NA, NA), DeathRateHeartFailure = c(11.4, 15.2,
11.3, 13.6, 13.8, 12.5), DeathRatePneumonia = c(10.9, 13.9, 13.4,
14.9, 15.8, 8.7)), row.names = c(NA, 6L), class = "data.frame")
按列号而不是名称进行筛选怎么样?
best <- function(state,outcome){
column <- paste('DeathRate',outcome, sep = "")
if (state %in% dataSelected$State < 1){
return('Invalid state')
} else if (column %in% colnames(dataSelected) < 1){
return('Invalid outcome')
} else{
BestHospitals <- dataSelected %>%
select(HostpitalName,State,column) %>%
filter(!is.na(.[,3])) %>%
filter(State == state) %>%
arrange(desc(.[3])))
return(BestHospitals[1,1])
}
}
我随意重写了您的函数。
# it is usually a bad idea to insert a global variable (like your data frame) inside a function.
best <- function(dat=NULL,state=NULL,outcome=NULL){
column <- paste('DeathRate',outcome, sep = "")
if (!state %in% dat$State | !column %in% colnames(dat)){
stop('Invalid input')
} # negating and using or "|" makes it easier to read
else{
BestHospitals <- dat %>%
select(HostpitalName,State,column) %>%
na.omit() %>% # for your purpose the much more concise na.omit() is a better option
filter(State == state) %>%
arrange(column,HostpitalName) %>%
filter(row_number()==1) #dplyr way to choose the first row
return(BestHospitals)
}
}
best(dat = dataSelected, state = "AL", outcome = "HeartAttack")
HostpitalName State DeathRateHeartAttack
1 ELIZA COFFEE MEMORIAL HOSPITAL AL 18.1
这里有一个类似tidyverse的函数版本。
正如D.J.所评论的,在函数中包含对全局对象(dataSelected
(的引用不是一种好的做法。将其作为参数传递要好得多。这还有一个有益的副作用,即允许您在管道中使用该函数。
此外,HostpitalName
可能是一个拼写错误。你是说HospitalName
吗?由于你不止一次使用这个奇怪的拼写,我保留了下来
虽然我理解为什么允许用户通过将"DeathRate"
的公共前缀粘贴到outcome
中传递的值来缩短所需结果的名称,但这可能不是最佳实践,因为需要用户了解此约定,并将函数的使用限制为遵循此约定的数据帧和列。它也不太适合tidyverse语法。
best <- function(d, state, outcome){
# By adding d as the first parameter, you make it easy to use the function in
# a pipe. And on data frames other than dataSelected.
qOutcome <- enquo(outcome)
# calling stop is better than returning an error message as
# it stops processing immediately.
if (!(state %in% (d %>% distinct(State) %>% pull(State) ))) {
stop('Invalid state')
}
if (!(as_label(qOutcome) %in% colnames(d)) ){
stop('Invalid outcome')
}
# Converting original code to equivalent tidyverse idioms.
# HostpitalName should perhaps be HospitalName in the source data frame
d %>%
select(HostpitalName, State, !! qOutcome) %>%
filter(!is.na(!! qOutcome)) %>%
filter(State == state) %>%
arrange(!! qOutcome, HostpitalName) %>%
pull(HostpitalName) %>%
head(1)
}
这样我们就可以写了
dataSelected %>% best("AL", DeathRateHeartFailure)
[1] "ELIZA COFFEE MEMORIAL HOSPITAL"
或者说
dataSelected %>% best("AL", DeathRatePneumonia)
[1] "MARSHALL MEDICAL CENTER NORTH"