我想更改此数据集:
id PTMIINDT PTMIINTM DGOTDIAG DGOTDGGB
1: ys00000001 20160101 614 R060 1
2: ys00000002 20160101 640 S0090 1
3: ys00000002 20160101 640 A090 2
4: ys00000003 20160101 959 R42 1
5: ys00000007 20160101 1111 S0600 1
6: ys00000008 20160101 1253 R558 1
到此数据集:
id PTMIINDT PTMIINTM DGOTDIAG01 DGOTDGGB01 DGOTDIAG02 DGOTDGGB02
1 ys00000001 20160101 614 R060 1 NA NA
2 ys00000002 20160101 640 S0090 1 A090 2
. . . .
. . . .
. . . .
喜欢这个。
我试图使这个数据集具有突变函数。 但它效果不佳。 如何像这样更改数据集?
ba<-n6 %>% group_by(id,PTMIINDT,PTMIINTM) %>%
mutate(DGOTDIAG01=DGOTDIAG, DGOTDIAG02=DGOTDIAG, DGOTDGGB01=DGOTDGGB,DGOTDGGB02=DGOTDGGB)
ba<-n6 %>% group_by(id,PTMIINDT,PTMIINTM) %>%
mutate(DGOTDIAG01=DGOTDIAG, DGOTDIAG02=DGOTDIAG, DGOTDGGB01=DGOTDGGB,DGOTDGGB02=DGOTDGGB)
id PTMIINDT PTMIINTM DGOTDIAG01 DGOTDGGB01 DGOTDIAG02 DGOTDGGB02
1 ys00000001 20160101 614 R060 1 NA NA
2 ys00000002 20160101 640 S0090 1 A090 2
. . . .
. . . .
. . . .
有了data.table::dcast()
,这可以使用以下单行来完成。
library(data.table)
示例数据
dt <- data.table::fread("id PTMIINDT PTMIINTM DGOTDIAG DGOTDGGB
ys00000001 20160101 614 R060 1
ys00000002 20160101 640 S0090 1
ys00000002 20160101 640 A090 2
ys00000003 20160101 959 R42 1
ys00000007 20160101 1111 S0600 1
ys00000008 20160101 1253 R558 1")
法典
data.table::dcast( dt, id + PTMIINDT + PTMIINTM ~ DGOTDGGB, value.var = c("DGOTDIAG", "DGOTDGGB") )
输出
# id PTMIINDT PTMIINTM DGOTDIAG_1 DGOTDIAG_2 DGOTDGGB.1_1 DGOTDGGB.1_2
# 1: ys00000001 20160101 614 R060 <NA> 1 NA
# 2: ys00000002 20160101 640 S0090 A090 1 2
# 3: ys00000003 20160101 959 R42 <NA> 1 NA
# 4: ys00000007 20160101 1111 S0600 <NA> 1 NA
# 5: ys00000008 20160101 1253 R558 <NA> 1 NA
tidyr
的开发版本具有一个新的动词,pivot_wider
更适合此任务。
https://tidyr.tidyverse.org/dev/articles/pivot.html
同时,您可以收集、转换和传播:
n6 %>%
gather(column, value, -c(id, PTMIINDT, PTMIINTM)) %>%
group_by(id, PTMIINDT, PTMIINTM) %>%
mutate(column = paste0(column,
stringr::str_pad(row_number(), width = 2, pad = 0))) %>%
spread(column, value)
# A tibble: 5 x 8
# Groups: id, PTMIINDT, PTMIINTM [5]
id PTMIINDT PTMIINTM DGOTDGGB02 DGOTDGGB03 DGOTDGGB04 DGOTDIAG01 DGOTDIAG02
<chr> <int> <int> <chr> <chr> <chr> <chr> <chr>
1 ys00000001 20160101 614 1 NA NA R060 NA
2 ys00000002 20160101 640 NA 1 2 S0090 A090
3 ys00000003 20160101 959 1 NA NA R42 NA
4 ys00000007 20160101 1111 1 NA NA S0600 NA
5 ys00000008 20160101 1253 1 NA NA R558 NA
使用tidyr
的最新版本(1.0.0
,已经在 CRAN 上(:
library(tidyr)
library(dplyr)
n6 %>%
group_by(id) %>%
dplyr::mutate(sbs = row_number()) %>%
pivot_wider(names_from = sbs, values_from = c(DGOTDIAG,DGOTDGGB))
# A tibble: 5 x 7
# Groups: id [5]
id PTMIINDT PTMIINTM DGOTDIAG_1 DGOTDIAG_2 DGOTDGGB_1 DGOTDGGB_2
<fct> <dbl> <dbl> <fct> <fct> <dbl> <dbl>
1 ys00000001 20160101 614 R060 NA 1 NA
2 ys00000002 20160101 640 S0090 A090 1 2
3 ys00000003 20160101 959 R42 NA 1 NA
4 ys00000007 20160101 1111 S0600 NA 1 NA
5 ys00000008 20160101 1253 R558 NA 1 NA