gl <- tibble::tribble(
~X4010, ~X4020, ~X4030, ~X5010, ~X5020, ~X5030, ~X5040,
1000, 100, 10, 500, 100, 50, 5,
1100, 110, 11, 550, 110, 55, 5.5,
1210, 121, 12.1, 605, 121, 60.5, 6.05,
1331, 133.1, 13.31, 665.5, 133.1, 66.55, 6.65,
1464.1, 146.41, 14.64, 732.05, 146.41, 73.2, 7.32,
1610.51, 161.05, 16.1, 805.25, 161.05, 80.52, 8.05,
1771.56, 177.15, 17.71, 885.78, 177.15, 88.57, 8.85,
1948.71, 194.87, 19.48, 974.35, 194.87, 97.43, 9.74,
2143.58, 214.35, 21.43, 1071.79, 214.35, 107.17, 10.71,
2357.94, 235.79, 23.57, 1178.97, 235.79, 117.89, 11.78
)
需要创建两个计算列,按行添加列,如下所示:
gl %>%
mutate(C1 = X4010+X4020+X4030, C2 = X5010+X5020+X5030+X5040 )
但是,由于X变量的数量非常大,我不想添加单独的列,而是希望指定范围,例如X4010:X4030
或select(starts_with("X40"))
。
如何修改C1
和C2
的公式以使用范围或模式?谢谢
所需输出如下:
result <- tibble::tribble(
~X4010, ~X4020, ~X4030, ~X5010, ~X5020, ~X5030, ~X5040, ~C1, ~C2,
1000, 100, 10, 500, 100, 50, 5, 1110, 655,
1100, 110, 11, 550, 110, 55, 5.5, 1221, 720.5,
1210, 121, 12.1, 605, 121, 60.5, 6.05, 1343.1, 792.55,
1331, 133.1, 13.31, 665.5, 133.1, 66.55, 6.65, 1477.41, 871.8,
1464.1, 146.41, 14.64, 732.05, 146.41, 73.2, 7.32, 1625.15, 958.98,
1610.51, 161.05, 16.1, 805.25, 161.05, 80.52, 8.05, 1787.66, 1054.87,
1771.56, 177.15, 17.71, 885.78, 177.15, 88.57, 8.85, 1966.42, 1160.35,
1948.71, 194.87, 19.48, 974.35, 194.87, 97.43, 9.74, 2163.06, 1276.39,
2143.58, 214.35, 21.43, 1071.79, 214.35, 107.17, 10.71, 2379.36, 1404.02,
2357.94, 235.79, 23.57, 1178.97, 235.79, 117.89, 11.78, 2617.3, 1544.43
)
我希望这就是您想要的:
library(dplyr)
gl %>%
rowwise() %>%
mutate(C1 = sum(c_across(starts_with("X40"))),
C2 = sum(c_across(starts_with("X50"))))
# A tibble: 10 x 9
# Rowwise:
X4010 X4020 X4030 X5010 X5020 X5030 X5040 C1 C2
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1000 100 10 500 100 50 5 1110 655
2 1100 110 11 550 110 55 5.5 1221 720.
3 1210 121 12.1 605 121 60.5 6.05 1343. 793.
4 1331 133. 13.3 666. 133. 66.6 6.65 1477. 872.
5 1464. 146. 14.6 732. 146. 73.2 7.32 1625. 959.
6 1611. 161. 16.1 805. 161. 80.5 8.05 1788. 1055.
7 1772. 177. 17.7 886. 177. 88.6 8.85 1966. 1160.
8 1949. 195. 19.5 974. 195. 97.4 9.74 2163. 1276.
9 2144. 214. 21.4 1072. 214. 107. 10.7 2379. 1404.
10 2358. 236. 23.6 1179. 236. 118. 11.8 2617. 1544.
使用split.default
-
cbind(gl, sapply(split.default(gl, sub('(X\d).*', '\1', names(gl))), rowSums))
# X4010 X4020 X4030 X5010 X5020 X5030 X5040 X4 X5
#1 1000.00 100.00 10.00 500.00 100.00 50.00 5.00 1110.00 655.00
#2 1100.00 110.00 11.00 550.00 110.00 55.00 5.50 1221.00 720.50
#3 1210.00 121.00 12.10 605.00 121.00 60.50 6.05 1343.10 792.55
#4 1331.00 133.10 13.31 665.50 133.10 66.55 6.65 1477.41 871.80
#5 1464.10 146.41 14.64 732.05 146.41 73.20 7.32 1625.15 958.98
#6 1610.51 161.05 16.10 805.25 161.05 80.52 8.05 1787.66 1054.87
#7 1771.56 177.15 17.71 885.78 177.15 88.57 8.85 1966.42 1160.35
#8 1948.71 194.87 19.48 974.35 194.87 97.43 9.74 2163.06 1276.39
#9 2143.58 214.35 21.43 1071.79 214.35 107.17 10.71 2379.36 1404.02
#10 2357.94 235.79 23.57 1178.97 235.79 117.89 11.78 2617.30 1544.43
我们还可以用pivot_longer
将其重塑为"long"格式,并获得每列的sum
,并将其绑定到原始数据
library(dplyr)
library(tidyr)
gl %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = -rn, names_to = c('.value', 'grp'),
names_pattern = '^(X\d)(.*)') %>%
group_by(rn) %>%
summarise(across(starts_with('X'), sum, na.rm = TRUE)) %>%
select(-rn) %>%
bind_cols(gl, .)
# A tibble: 10 x 9
# X4010 X4020 X4030 X5010 X5020 X5030 X5040 X4 X5
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1000 100 10 500 100 50 5 1110 655
# 2 1100 110 11 550 110 55 5.5 1221 720.
# 3 1210 121 12.1 605 121 60.5 6.05 1343. 793.
# 4 1331 133. 13.3 666. 133. 66.6 6.65 1477. 872.
# 5 1464. 146. 14.6 732. 146. 73.2 7.32 1625. 959.
# 6 1611. 161. 16.1 805. 161. 80.5 8.05 1788. 1055.
# 7 1772. 177. 17.7 886. 177. 88.6 8.85 1966. 1160.
# 8 1949. 195. 19.5 974. 195. 97.4 9.74 2163. 1276.
# 9 2144. 214. 21.4 1072. 214. 107. 10.7 2379. 1404.
#10 2358. 236. 23.6 1179. 236. 118. 11.8 2617. 1544.