我尝试用这种方式合并列:
library(dplyr)
library(stringr)
library(tidyr)
dfcheck <- data.frame(status = c("open/close", "close", "open"), stock = c("company energy","goods and books","other"), name = c("amazon1;google1","google3;yahoo1","yahoo2;amazon2;google2"))
dfdata <- data.frame(id = c("id1", "id2", "id3"), title1 = c("amazon1","google1","yahoo1"), title2 = c("yahoo2",NA,"amazon2"))
dfcheck_tidy <- dfcheck %>%
mutate(name = str_split(name, ";")) %>%
unnest(name)
dfdata %>%
left_join(dfcheck_tidy,
by = c("title1" = "name")) %>%
left_join(dfcheck_tidy,
by = c("title2" = "name"),
suffix = c("1", "2"))
多于两列:
df_in2 <- df_in %>%
left_join(df_in_tidy,
by = c("in_1" = "stock_name")) %>%
left_join(dfedu_categories_tidy,
by = c("in_2" = "stock_name")) %>%
left_join(dfedu_categories_tidy,
by = c("in_3" = "stock_name")) %>%
left_join(dfedu_categories_tidy,
by = c("in_4" = "stock_name")) %>%
left_join(dfedu_categories_tidy,
by = c("in_5" = "stock_name")) %>%
left_join(dfedu_categories_tidy,
by = c("in_6" = "stock_name")) %>%
left_join(dfedu_categories_tidy,
by = c("in_7" = "stock_name")) %>%
left_join(dfedu_categories_tidy,
by = c("in_8" = "stock_name")) %>%
left_join(dfedu_categories_tidy,
by = c("in_9" = "stock_name"),
suffix = c("1", "2", "3", "4", "5", "6", "7", "8", "9"))
怎么可能有更多的列来避免收到这个错误:
Error in `left_join()`: ! `suffix` must be a character vector of length 2. ℹ `suffix` is a character vector of length 8. Run `rlang::last_error()` to see where the error occurred.
连接是否可以有两个以上的列?[1]:
我们可以修改原来的解决方案,这样无论需要多少股票名,都只需要一个连接。
dfcheck <- data.frame(status = c("open/close", "close", "open"), stock = c("company energy","goods and books","other"), name = c("amazon1;google1","google3;yahoo1","yahoo2;amazon2;google2"))
dfdata <- data.frame(id = c("id1", "id2", "id3"), title1 = c("amazon1","google1","yahoo1"), title2 = c("yahoo2",NA,"amazon2"))
与原始解决方案一样,重新格式化dfcheck
,使其具有一个"名称";每一行:
dfcheck_tidy <- dfcheck %>%
separate_rows(name)
status stock name
<chr> <chr> <chr>
1 open/close company energy amazon1
2 open/close company energy google1
3 close goods and books google3
4 close goods and books yahoo1
5 open other yahoo2
6 open other amazon2
7 open other google2
重新格式化dfdata
,使"title1"one_answers";title2"被替换为"title_num"one_answers";title_name"列。通过这种方式,可以有任意数量的标题,而我们仍然只有这些列。
dfdata_long <- dfdata %>%
pivot_longer(-id, names_to = 'title_num', values_to = 'title_name') %>%
mutate(title_num = parse_number(title_num))
id title_num title_name
<chr> <dbl> <chr>
1 id1 1 amazon1
2 id1 2 yahoo2
3 id2 1 google1
4 id2 2 NA
5 id3 1 yahoo1
6 id3 2 amazon2
最后,将重新格式化的数据集连接在一起,然后使用pivot_wider()
创建编号列。注意,这个操作只需要一个连接。
data_out <- dfdata_long %>%
inner_join(dfcheck_tidy, by = c(title_name = 'name')) %>%
pivot_wider(names_from = title_num, values_from = c(title_name, status, stock))
id title_name_1 title_name_2 status_1 status_2 stock_1 stock_2
<chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 id1 amazon1 yahoo2 open/close open company energy other
2 id2 google1 NA open/close NA company energy NA
3 id3 yahoo1 amazon2 close open goods and books other