r-Dplyr突变循环



有没有更好的方法可以在r中使用dplyr实现这一点,而不必为每个变量键入新的公式?

code    dagala_price_1  dagala_price_2  dagala_price_3  dagala_price_4  dagala_price_5  dagala_unit_nb_1    dagala_unit_nb_2    dagala_unit_nb_3    dagala_unit_nb_4    dagala_unit_nb_5
MI-NAL-KA   50  15000   NA  NA  NA  100 1   NA  NA  NA
M-KK-KZ 10000   20000   NA  NA  NA  20  2   NA  NA  NA
M-KK-NK 10000   NA  NA  NA  NA  5   NA  NA  NA  NA
MI-NA-BA    12000   15000   NA  NA  NA  2   1   NA  NA  NA
MI-BD-BT    12000   15000   NA  NA  NA  3   1   NA  NA  NA
MI-MI-ND    12000   80000   NA  NA  NA  8   1   NA  NA  NA
MI-NAL-LT   13000   15000   NA  18000   NA  1   3   NA  1   NA
M-BY-BGY    13000   15000   NA  NA  NA  4   1   NA  NA  NA
MI-NA-NY    13000   NA  NA  NA  NA  2   NA  NA  NA  NA
MI-KAN-BL   18000   35000   15000   NA  NA  1   1   6   NA  NA
MI-KIGO-KR  20000   15000   15000   NA  NA  10  8   4   NA  NA
MI-KAN-KY   20000   16000   NA  NA  NA  2   6   NA  NA  NA
MI-NAL-BB   20000   35000   250000  NA  NA  1   1   1   NA  NA
MI-KAM-AL   30000   14000   13000   NA  NA  1   10  2   NA  NA

df <- df %>% mutate(
dagala_total_1 = dagala_price_1 * dagala_unit_nb_1,
dagala_total_2 = dagala_price_2 * dagala_unit_nb_2,
dagala_total_3 = dagala_price_3 * dagala_unit_nb_3, 
dagala_total_total =dagala_total_1 + dagala_total_2 + dagala_total_3)       

根据您的数据,您可以将其排列为长格式(tidyverse术语中的"整洁"(,这将为您提供更简单的代码。

我假设你有达加拉单位和五组1~5的价格,所以我在data.frame中添加了一个新的组变量,使其整洁,即以"长"形式

library(tidyr)
library(dplyr)
library(data.table)

df <- data.table::fread(
"code    dagala_price_1  dagala_price_2  dagala_price_3  dagala_price_4  dagala_price_5  dagala_unit_nb_1    dagala_unit_nb_2    dagala_unit_nb_3    dagala_unit_nb_4    dagala_unit_nb_5
MI-NAL-KA   50  15000   NA  NA  NA  100 1   NA  NA  NA
M-KK-KZ 10000   20000   NA  NA  NA  20  2   NA  NA  NA
M-KK-NK 10000   NA  NA  NA  NA  5   NA  NA  NA  NA
MI-NA-BA    12000   15000   NA  NA  NA  2   1   NA  NA  NA
MI-BD-BT    12000   15000   NA  NA  NA  3   1   NA  NA  NA
MI-MI-ND    12000   80000   NA  NA  NA  8   1   NA  NA  NA
MI-NAL-LT   13000   15000   NA  18000   NA  1   3   NA  1   NA
M-BY-BGY    13000   15000   NA  NA  NA  4   1   NA  NA  NA
MI-NA-NY    13000   NA  NA  NA  NA  2   NA  NA  NA  NA
MI-KAN-BL   18000   35000   15000   NA  NA  1   1   6   NA  NA
MI-KIGO-KR  20000   15000   15000   NA  NA  10  8   4   NA  NA
MI-KAN-KY   20000   16000   NA  NA  NA  2   6   NA  NA  NA
MI-NAL-BB   20000   35000   250000  NA  NA  1   1   1   NA  NA
MI-KAM-AL   30000   14000   13000   NA  NA  1   10  2   NA  NA"
)

df.price <- df %>%
select(code, matches("price_")) %>%
# gather price by group
gather(key=groups,value=dagala_price,matches("price_")) %>%
# extract last number as group
mutate(groups = gsub(".*(\d)$","\1",groups))
#> Warning: package 'bindrcpp' was built under R version 3.4.4

df.unit <- df %>%
select(code,matches("unit_nb")) %>%
# gather units by group
gather(key=groups,value=dagala_unit,matches("unit_")) %>%
# extract last number as group
mutate(groups = gsub(".*(\d)$","\1",groups))

df.tidy <- left_join(df.price,df.unit)
#> Joining, by = c("code", "groups")

这个df.tidy是"long"整洁形式,在tidyverse语法中更容易操作:

# Tidy data.frame
df.tidy
# A tibble: 70 x 4
code      groups dagala_price dagala_unit
<chr>     <chr>         <int>       <int>
1 MI-NAL-KA 1                50         100
2 M-KK-KZ   1             10000          20
3 M-KK-NK   1             10000           5
4 MI-NA-BA  1             12000           2
5 MI-BD-BT  1             12000           3
6 MI-MI-ND  1             12000           8
7 MI-NAL-LT 1             13000           1
8 M-BY-BGY  1             13000           4
9 MI-NA-NY  1             13000           2
10 MI-KAN-BL 1             18000           1
# ... with 60 more rows

然后,您的代码可以像这样简化:

# Then some summarise operations
df.total_by_grp <- df.tidy %>%
mutate(dagala_total = dagala_price * dagala_unit) 
# summarise by group
head(df.total_by_grp)
#>        code groups dagala_price dagala_unit dagala_total
#> 1 MI-NAL-KA      1           50         100         5000
#> 2   M-KK-KZ      1        10000          20       200000
#> 3   M-KK-NK      1        10000           5        50000
#> 4  MI-NA-BA      1        12000           2        24000
#> 5  MI-BD-BT      1        12000           3        36000
#> 6  MI-MI-ND      1        12000           8        96000

df.total_by_code <- df.tidy %>%
mutate(dagala_total = dagala_price * dagala_unit) %>%
group_by(code) %>%
summarise(code_total = sum(dagala_total,na.rm = TRUE))
# summarise by total
head(df.total_by_code)
#> # A tibble: 6 x 2
#>   code      code_total
#>   <chr>          <int>
#> 1 M-BY-BGY       67000
#> 2 M-KK-KZ       240000
#> 3 M-KK-NK        50000
#> 4 MI-BD-BT       51000
#> 5 MI-KAM-AL     196000
#> 6 MI-KAN-BL     143000

创建于2018-07-28由reprex包(v0.2.0(。

最新更新