r-将ICD10代码从一行扩展到多行



我有一个这样的数据集:

ICD_10 诊断
A00 霍乱
A01-A03 其他肠道传染病
A15 呼吸系统结核
A17-A19 其他结核病

使用专用icd包:

#data
d <- structure(list(ICD_10 = c("A00", "A01-A03", "A15", "A17-A19"), diagnosis = c("Cholera", "Other Intestinal infectious diseases", "Respiratory tuberculosis", "Other tuberculosis")), class = "data.frame", row.names = c(NA, -4L))
#remotes::install_github("jackwasey/icd")
library(icd)

为了避免在范围之间创建不存在或丢失现有代码,我们使用expand_ranges。例如,如果我们按顺序填写A01、A02、A03,则下面返回33个代码,而不是3个,这是错误的。

expand_range("A01", "A03")
#  [1] "A01"   "A010"  "A0100" "A0101" "A0102" "A0103" "A0104" "A0105"
#  [9] "A0109" "A011"  "A012"  "A013"  "A014"  "A02"   "A020"  "A021" 
# [17] "A022"  "A0220" "A0221" "A0222" "A0223" "A0224" "A0225" "A0229"
# [25] "A028"  "A029"  "A03"   "A030"  "A031"  "A032"  "A033"  "A038" 
# [33] "A039"

我们还使用explain_code来描述新创建的代码,例如用法:

explain_code("A01")
# [1] "Typhoid and paratyphoid fevers"

现在,将两个函数合并为一个函数,以获得漂亮的输出

# custom function using expand_range
f <- function(icd10, diagnosis){
x <- unlist(strsplit(icd10, "-"))

if(length(x) == 1){ ICD10 = x 
} else {ICD10 = expand_range(x[1], x[2])}

data.frame(
icd10 = icd10,
diagnosis = diagnosis, 
icd10range = ICD10,
desc = explain_code(ICD10))
}

并循环代码进行扩展,然后rowbind:

# loop through rows, and rowbind
res <- do.call(rbind, 
mapply(f, d$ICD_10, d$diagnosis,
SIMPLIFY = FALSE, USE.NAMES = FALSE))
head(res)
#     icd10                            diagnosis icd10range                                 desc
# 1     A00                              Cholera        A00                              Cholera
# 2 A01-A03 Other Intestinal infectious diseases        A01       Typhoid and paratyphoid fevers
# 3 A01-A03 Other Intestinal infectious diseases       A010                        Typhoid fever
# 4 A01-A03 Other Intestinal infectious diseases      A0100           Typhoid fever, unspecified
# 5 A01-A03 Other Intestinal infectious diseases      A0101                   Typhoid meningitis
# 6 A01-A03 Other Intestinal infectious diseases      A0102 Typhoid fever with heart involvement

正如预期的那样,A01-A03现在扩展到33行:

table(res$icd10)
# A00 A01-A03     A15 A17-A19 
#   1      33       1      53 
fun <- function(vec) {
ltr <- substring(vec, 1, 1)
L <- lapply(strsplit(gsub("[^-0-9]", "", vec), "-"), as.integer)
mapply(function(ltr, z) sprintf("%s%02i", ltr, if (length(z) > 1) seq(z[1], z[2]) else z),
ltr, L)
}
quux %>%
mutate(ICD_10 = fun(ICD_10)) %>%
tidyr::unnest(ICD_10)
# # A tibble: 8 x 2
#   ICD_10 diagnosis                           
#   <chr>  <chr>                               
# 1 A00    Cholera                             
# 2 A01    Other Intestinal infectious diseases
# 3 A02    Other Intestinal infectious diseases
# 4 A03    Other Intestinal infectious diseases
# 5 A15    Respiratory tuberculosis            
# 6 A17    Other tuberculosis                  
# 7 A18    Other tuberculosis                  
# 8 A19    Other tuberculosis                  

数据

quux <- structure(list(ICD_10 = c("A00", "A01-A03", "A15", "A17-A19"), diagnosis = c("Cholera", "Other Intestinal infectious diseases", "Respiratory tuberculosis", "Other tuberculosis")), class = "data.frame", row.names = c(NA, -4L))

一个选项:

tibble::tribble(
~ICD_10, ~diagnosis,
"A00", "Cholera",
"A01-A03", "Other Intestinal infectious diseases",
"A15", "Respiratory tuberculosis",
"A17-A19", "Other tuberculosis"
) |> 
tidyr::separate_rows(ICD_10, sep = "-") |> 
mutate(id = parse_number(ICD_10)) |> 
group_by(diagnosis) |> 
complete(id = min(id):max(id)) |> 
mutate(ICD_10 = paste0("A", id))

最新更新