我有一个数据框架,我试图把我可以用于我的分析,看起来如下格式:
<表类>
ID
名称
年
K1
K2
…
K50
tbody><<tr>1 合同XYZ 2000 交通 大象 2协议ABC 2003 笔 音乐 3 文档123 2003 大象 4空白 2004 音乐 交通 表类>
这对你有帮助吗?
library(tidyverse)
document <- data.frame(
stringsAsFactors = FALSE,
ID = c(1L, 2L, 3L, 4L),
Name = c("Contract XYZ","Agreement ABC",
"Document 123","Empty Space"),
Year = c(2000L, 2003L, 2003L, 2004L),
K1 = c("transport", "pens", "elephants", "music"),
K2 = c("elephants", "music", NA, NA),
K50 = c(NA, NA, NA, "transport")
)
document %>%
pivot_longer(starts_with("K")) %>%
select(-name) %>%
filter(! is.na(value)) %>%
mutate(has_property = 1) %>%
pivot_wider(names_from = value, values_from = has_property)
#> # A tibble: 4 x 7
#> ID Name Year transport elephants pens music
#> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 Contract XYZ 2000 1 1 NA NA
#> 2 2 Agreement ABC 2003 NA NA 1 1
#> 3 3 Document 123 2003 NA 1 NA NA
#> 4 4 Empty Space 2004 1 NA NA 1
由reprex包(v2.0.1)在2021-09-21创建
我们可以使用具有dummy_columns
功能的fastDummies
包。使用来自@danlooo
document %>% pivot_longer(matches('K\d+'), names_to = NULL) %>%
filter(!is.na(value)) %>%
fastDummies::dummy_columns('value') %>%
rename_with(~str_remove(.x, '^value_'), starts_with('value_'))
# A tibble: 7 x 8
ID Name Year value elephants music pens transport
<int> <chr> <int> <chr> <int> <int> <int> <int>
1 1 Contract XYZ 2000 transport 0 0 0 1
2 1 Contract XYZ 2000 elephants 1 0 0 0
3 2 Agreement ABC 2003 pens 0 0 1 0
4 2 Agreement ABC 2003 music 0 1 0 0
5 3 Document 123 2003 elephants 1 0 0 0
6 4 Empty Space 2004 music 0 1 0 0
7 4 Empty Space 2004 transport 0 0 0 1
document <- data.frame(
stringsAsFactors = FALSE,
ID = c(1L, 2L, 3L, 4L),
Name = c("Contract XYZ","Agreement ABC",
"Document 123","Empty Space"),
Year = c(2000L, 2003L, 2003L, 2004L),
K1 = c("transport", "pens", "elephants", "music"),
K2 = c("elephants", "music", NA, NA),
K50 = c(NA, NA, NA, "transport")
)
> document
ID Name Year K1 K2 K50
1 1 Contract XYZ 2000 transport elephants <NA>
2 2 Agreement ABC 2003 pens music <NA>
3 3 Document 123 2003 elephants <NA> <NA>
4 4 Empty Space 2004 music <NA> transport