我有一个数据集,其中每个值的像素数如下所示:
HISTO_2 HISTO_3 HISTO_4 HISTO_5 HISTO_6 HISTO_7 HISTO_10 HISTO_11 HISTO_14 HISTO_18 HISTO_19 HISTO_23
1 0 390 652 157 32 7 0 0 0 0 0 0
2 0 22 41 27 23 11 8 5 4 11 2 4
3 0 916 671 167 40 7 4 5 2 1 2 2
4 0 2600 810 172 38 0 0 0 0 0 0 0
5 0 110 987 791 248 59 11 5 0 1 0 0
6 0 778 808 182 43 5 0 0 0 0 0 0
7 0 1095 846 199 55 12 8 3 0 0 0 0
8 0 1045 545 60 0 0 0 0 0 0 0 0
9 0 868 422 92 2 0 0 0 0 0 0 0
10 0 1225 597 160 57 27 0 0 0 0 0 0
11 0 1092 1096 635 150 33 0 0 0 0 0 0
HISTO_2捕获值为2的像素数,HISTO_3捕获值为3的像素数,以此类推。我需要找到一种方法,使我能够有效地计算总价值每一行的所有像素。准确地说,列HISTO_3的每个值必须乘以3,列HISTO_$4的每个值必须乘以4,以此类推……在此之前,总和可以计算出来。这需要处理10个数据集。从表中可以看出,列的值并不遵循平衡序列,对于每个数据集,序列可能不同。
我的问题有什么有效的解决方案吗?PS:如果你对我的问题有更好的标题,请随意编辑:)
由于您对行像素的TOTAL VALUE感兴趣,您可以这样做:
基地R:
colSums(t(df) * as.numeric(gsub('\D', '',names(df))))
1 2 3 4 5 6 7 8 9 10 11
4804 1099 6781 12128 10317 6769 8191 5615 4764 7394 11966
甚至:
as.matrix(df)%*%as.numeric(gsub('\D', '',names(df)))
[,1]
1 4804
2 1099
3 6781
4 12128
5 10317
6 6769
7 8191
8 5615
9 4764
10 7394
11 11966
如果您不熟悉gsub
和\D
即正则表达式,则使用
library(readr)
as.matrix(df) %*% parse_number(names(df))
[,1]
1 4804
2 1099
3 6781
4 12128
5 10317
6 6769
7 8191
8 5615
9 4764
10 7394
11 11966
Withdplyr
andtidyr
(forunnest_wider
)
编辑:首先从变量名中提取乘法值,然后unnest
列表以启用跨块乘法。
library(dplyr)
library(tidyr)
df %>%
mutate(mult = list(as.numeric(sub(".*_(\d+)$", "\1", colnames(.))))) %>%
unnest_wider(mult, names_sep="_") %>%
summarize(across(starts_with("HIST")) * across(starts_with("mult"))) %>%
rowwise() %>%
mutate(total = sum(c_across(everything()))) %>%
ungroup()
# A tibble: 11 × 13
HISTO_2 HISTO_3 HISTO_4 HISTO_5 HISTO_6 HISTO_7 HISTO_10 HISTO_11 HISTO_14
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 1170 2608 785 192 49 0 0 0
2 0 66 164 135 138 77 80 55 56
3 0 2748 2684 835 240 49 40 55 28
4 0 7800 3240 860 228 0 0 0 0
5 0 330 3948 3955 1488 413 110 55 0
6 0 2334 3232 910 258 35 0 0 0
7 0 3285 3384 995 330 84 80 33 0
8 0 3135 2180 300 0 0 0 0 0
9 0 2604 1688 460 12 0 0 0 0
10 0 3675 2388 800 342 189 0 0 0
11 0 3276 4384 3175 900 231 0 0 0
HISTO_18 HISTO_19 HISTO_23 total
<dbl> <dbl> <dbl> <dbl>
1 0 0 0 4804
2 198 38 92 1099
3 18 38 46 6781
4 0 0 0 12128
5 18 0 0 10317
6 0 0 0 6769
7 0 0 0 8191
8 0 0 0 5615
9 0 0 0 4764
10 0 0 0 7394
11 0 0 0 11966
对于所有10个数据集
df_list <- list(df1, df2, df3, df4, df5, df6, df7, df8, df9, df10)
lapply(df_list ,function(x)
x %>%
mutate(mult = list(as.numeric(sub(".*_(\d+)$", "\1", colnames(.))))) %>%
unnest_wider(mult, names_sep="_") %>%
summarize(across(starts_with("HIST")) * across(starts_with("mult"))) %>%
rowwise() %>%
mutate(total = sum(c_across(everything()))) %>%
ungroup())
df <- structure(list(HISTO_2 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), HISTO_3 = c(390L, 22L, 916L, 2600L, 110L, 778L, 1095L,
1045L, 868L, 1225L, 1092L), HISTO_4 = c(652L, 41L, 671L, 810L,
987L, 808L, 846L, 545L, 422L, 597L, 1096L), HISTO_5 = c(157L,
27L, 167L, 172L, 791L, 182L, 199L, 60L, 92L, 160L, 635L), HISTO_6 = c(32L,
23L, 40L, 38L, 248L, 43L, 55L, 0L, 2L, 57L, 150L), HISTO_7 = c(7L,
11L, 7L, 0L, 59L, 5L, 12L, 0L, 0L, 27L, 33L), HISTO_10 = c(0L,
8L, 4L, 0L, 11L, 0L, 8L, 0L, 0L, 0L, 0L), HISTO_11 = c(0L, 5L,
5L, 0L, 5L, 0L, 3L, 0L, 0L, 0L, 0L), HISTO_14 = c(0L, 4L, 2L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), HISTO_18 = c(0L, 11L, 1L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L), HISTO_19 = c(0L, 2L, 2L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L), HISTO_23 = c(0L, 4L, 2L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11"))
df <- structure(list(HISTO_2 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), HISTO_3 = c(390L, 22L, 916L, 2600L, 110L, 778L, 1095L,
1045L, 868L, 1225L, 1092L), HISTO_4 = c(652L, 41L, 671L, 810L,
987L, 808L, 846L, 545L, 422L, 597L, 1096L), HISTO_5 = c(157L,
27L, 167L, 172L, 791L, 182L, 199L, 60L, 92L, 160L, 635L), HISTO_6 = c(32L,
23L, 40L, 38L, 248L, 43L, 55L, 0L, 2L, 57L, 150L), HISTO_7 = c(7L,
11L, 7L, 0L, 59L, 5L, 12L, 0L, 0L, 27L, 33L), HISTO_10 = c(0L,
8L, 4L, 0L, 11L, 0L, 8L, 0L, 0L, 0L, 0L), HISTO_11 = c(0L, 5L,
5L, 0L, 5L, 0L, 3L, 0L, 0L, 0L, 0L), HISTO_14 = c(0L, 4L, 2L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), HISTO_18 = c(0L, 11L, 1L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L), HISTO_19 = c(0L, 2L, 2L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L), HISTO_23 = c(0L, 4L, 2L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11"))