合并数据集会导致列数据转换为NA



我正在尝试基于日期合并两个数据集

然而,合并函数导致";duration_ days";要转换为NA 的部分

有办法解决这个问题吗?

> dput(head(ksM))
structure(list(date = c("01/01/2021", "01/05/2021", "01/06/2021", 
"01/07/2021", "01/11/2021", "01/12/2021"), `projects launched` = c(4L, 
8L, 4L, 4L, 4L, 12L), pledged = c(50278.64, 366279.590415302, 
172073.0471292, 230.537553792, 304353.5676352, 285277.861423738
), backers = c(2880L, 6588L, 3528L, 16L, 4204L, 6632L), `total goal` = c(24000, 
148000, 60000, 4000, 140000, 198000), `mean goal` = c(6000, 18500, 
15000, 1000, 35000, 16500), US = c(4L, 4L, NA, NA, NA, 8L), `number of success` = c(4L, 
8L, 4L, 4L, 4L, 12L)), row.names = c(NA, 6L), class = "data.frame")
> dput(head(group_duration))
structure(list(date = structure(c(18628, 18632, 18633, 18634, 
18638, 18639), class = "Date"), duration_days = c(30, 31, 30, 
53, 30, 42.6666666666667)), row.names = c(NA, -6L), class = c("tbl_df", 
"tbl", "data.frame"))
test <- merge(ksM, group_duration, by.x = "date", by.y = "date", all.x= TRUE )
> dput(head(test))
structure(list(date = c("01/01/2021", "01/05/2021", "01/06/2021", 
"01/07/2021", "01/11/2021", "01/12/2021"), `projects launched` = c(4L, 
8L, 4L, 4L, 4L, 12L), pledged = c(50278.64, 366279.590415302, 
172073.0471292, 230.537553792, 304353.5676352, 285277.861423738
), backers = c(2880L, 6588L, 3528L, 16L, 4204L, 6632L), `total goal` = c(24000, 
148000, 60000, 4000, 140000, 198000), `mean goal` = c(6000, 18500, 
15000, 1000, 35000, 16500), US = c(4L, 4L, NA, NA, NA, 8L), `number of success` = c(4L, 
8L, 4L, 4L, 4L, 12L), duration_days = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_)), row.names = c(NA, 6L), class = "data.frame")

您的日期列有不同的格式。您可以通过使用具有正确格式的as.Date来改变这一点,并使用类似以下的left_join来加入它们:

ksM <- structure(list(date = c("01/01/2021", "01/05/2021", "01/06/2021", 
                               "01/07/2021", "01/11/2021", "01/12/2021"), `projects launched` = c(4L, 
                                                                                                  8L, 4L, 4L, 4L, 12L), pledged = c(50278.64, 366279.590415302, 
                                                                                                                                    172073.0471292, 230.537553792, 304353.5676352, 285277.861423738
                                                                                                  ), backers = c(2880L, 6588L, 3528L, 16L, 4204L, 6632L), `total goal` = c(24000, 
                                                                                                                                                                           148000, 60000, 4000, 140000, 198000), `mean goal` = c(6000, 18500, 
                                                                                                                                                                                                                                 15000, 1000, 35000, 16500), US = c(4L, 4L, NA, NA, NA, 8L), `number of success` = c(4L, 
                                                                                                                                                                                                                                                                                                                     8L, 4L, 4L, 4L, 12L)), row.names = c(NA, 6L), class = "data.frame")
group_duration <- structure(list(date = structure(c(18628, 18632, 18633, 18634, 
                                                    18638, 18639), class = "Date"), duration_days = c(30, 31, 30, 
                                                                                                      53, 30, 42.6666666666667)), row.names = c(NA, -6L), class = c("tbl_df", 
                                                                                                                                                                    "tbl", "data.frame"))                    
library(dplyr)
ksM %>% 
  mutate(date = as.Date(date, "%m/%d/%Y")) %>% 
  left_join(group_duration, by = "date")
#>         date projects launched     pledged backers total goal mean goal US
#> 1 2021-01-01                 4  50278.6400    2880      24000      6000  4
#> 2 2021-01-05                 8 366279.5904    6588     148000     18500  4
#> 3 2021-01-06                 4 172073.0471    3528      60000     15000 NA
#> 4 2021-01-07                 4    230.5376      16       4000      1000 NA
#> 5 2021-01-11                 4 304353.5676    4204     140000     35000 NA
#> 6 2021-01-12                12 285277.8614    6632     198000     16500  8
#>   number of success duration_days
#> 1                 4      30.00000
#> 2                 8      31.00000
#> 3                 4      30.00000
#> 4                 4      53.00000
#> 5                 4      30.00000
#> 6                12      42.66667

创建于2022-07-22由reprex包(v2.0.1(

ksM中的date列是一个字符向量。您需要将其转换为日期:

ksM$date  <- as.Date(ksM$date, format = "%m/%d/%Y")
merge(ksM, group_duration, by = "date", all.x= TRUE )
#         date projects launched     pledged backers total goal mean goal US number of success duration_days
# 1 2021-01-01                 4  50278.6400    2880      24000      6000  4                 4      30.00000
# 2 2021-01-05                 8 366279.5904    6588     148000     18500  4                 8      31.00000
# 3 2021-01-06                 4 172073.0471    3528      60000     15000 NA                 4      30.00000
# 4 2021-01-07                 4    230.5376      16       4000      1000 NA                 4      53.00000
# 5 2021-01-11                 4 304353.5676    4204     140000     35000 NA                 4      30.00000
# 6 2021-01-12                12 285277.8614    6632     198000     16500  8                12      42.66667

最新更新