查找两列相等但对每个分组变量的百分比

  • 本文关键字:变量 百分比 两列 查找 r
  • 更新时间 :
  • 英文 :


我想弄清楚order_ou和Rank都等于3的次数。然后我想把这个数字放在ordre_ou和rank不相等的次数上(其中一个等于3,但另一个值是1或2)。但是,让这变得困难的是,有时ordre_ou = 9。当它等于9时,这基本上意味着"不"。例如,如果ordre_ou = 9但rank = 3,这并不能说明任何问题。我想把这个ID算在等式里。

我知道这是一个令人困惑的情况,所以如果需要的话我会解释更多。基本上,order_ou是字段中的正确值,rank是计算机计算出的鸡蛋大小。我想看看现场工作和电脑的匹配率。所有东西都应该按ID分组。

dput(head(test, n= 100))

structure(list(ordre_ou = c("9", "9", "9", "9", "9", "9", "9", 
"9", "9", "9", "9", "9", "3", "9", "9", "9", "9", "9", "1", "2", 
"9", "9", "9", "9", "9", "9", "9", "9", "9", "9", "9", "9", "9", 
"9", "9", "9", "9", "9", "9", "9", "9", "9", "9", "9", "9", "9", 
"9", "9", "9", "1", "2", "9", "9", "9", "9", "9", "9", "1", "9", 
"9", "9", "9", "9", "9", "9", "9", "9", "9", "9", "9", "9", "9", 
"3", "9", "9", "9", "9", "9", "3", "9", "9", "9", "9", "9", "9", 
"9", "9", "9", "9", "9", "9", "9", "9", "9", "9", "9", "3", "9", 
"9", "9"), ID = c(65L, 65L, 65L, 65L, 88L, 88L, 88L, 201L, 201L, 
201L, 245L, 245L, 245L, 492L, 492L, 492L, 566L, 566L, 670L, 670L, 
704L, 704L, 704L, 753L, 753L, 753L, 784L, 784L, 784L, 819L, 819L, 
819L, 899L, 899L, 978L, 978L, 978L, 1060L, 1060L, 1060L, 1085L, 
1085L, 1085L, 1101L, 1101L, 1101L, 1235L, 1235L, 1235L, 1245L, 
1245L, 1269L, 1269L, 1269L, 1355L, 1355L, 1355L, 1356L, 1398L, 
1398L, 1398L, 1432L, 1432L, 1432L, 1458L, 1458L, 1458L, 1485L, 
1485L, 1485L, 1495L, 1495L, 1495L, 1505L, 1505L, 1505L, 1547L, 
1547L, 1547L, 1647L, 1647L, 1647L, 1657L, 1657L, 1688L, 1688L, 
1689L, 1689L, 1689L, 1698L, 1698L, 1708L, 1708L, 1708L, 1788L, 
1788L, 1788L, 1818L, 1818L, 1818L), PVC = c("2JA", "2JA", "2JA", 
"2JA", "378", "378", "378", "6180113", "6180113", "6180113", 
"6C9", "6C9", "6C9", "ABBM", "ABBM", "ABBM", "ACAD", "ACAD", 
"ADHY", "ADHY", "AFA2", "AFA2", "AFA2", "AFWY", "AFWY", "AFWY", 
"AH62", "AH62", "AH62", "AHJZ", "AHJZ", "AHJZ", "ALX7", "ALX7", 
"AMNH", "AMNH", "AMNH", "AP1D", "AP1D", "AP1D", "APLV", "APLV", 
"APLV", "APWU", "APWU", "APWU", "AT0F", "AT0F", "AT0F", "AT4Y", 
"AT4Y", "AT9F", "AT9F", "AT9F", "AV09", "AV09", "AV09", "AV1H", 
"AVH3", "AVH3", "AVH3", "AVZ1", "AVZ1", "AVZ1", "AWJR", "AWJR", 
"AWJR", "AXWA", "AXWA", "AXWA", "AXZ8", "AXZ8", "AXZ8", "AY45", 
"AY45", "AY45", "AZTN", "AZTN", "AZTN", "BABH", "BABH", "BABH", 
"BAHF", "BAHF", "BBHW", "BBHW", "BBJ2", "BBJ2", "BBJ2", "BBR7", 
"BBR7", "BC48", "BC48", "BC48", "BLU4", "BLU4", "BLU4", "BMWZ", 
"BMWZ", "BMWZ"), volume = c(59.23784990144, 57.67430439496, 55.941075465885, 
48.404429520525, 67.157961538635, 64.8180845235, 63.97980996672, 
68.91794748218, 58.15209427632, 57.52472936967, 66.667141436785, 
64.58676156675, 64.023665822545, 65.69135053824, 64.95949243106, 
63.717349423605, 57.7816829604, 57.75826384494, 67.7353109265, 
67.722914861455, 59.107361578275, 53.6827574912, 52.437236559625, 
64.64865510559, 60.092046898125, 55.65314064794, 65.9105613504, 
62.9811246456, 61.480030107375, 68.98102287872, 64.97415691434, 
62.0186388864, 63.249484535685, 62.914807201085, 60.94741873068, 
59.17492867088, 56.411384122335, 63.2272234135, 62.956485644075, 
59.157746675305, 57.32597558688, 55.434329712945, 54.3981319965, 
62.651453886945, 62.577372119625, 62.2632710628, 68.695014093745, 
64.7600712, 62.976206042835, 58.64841651305, 52.945914427435, 
64.2293729965, 60.22368881416, 60.125075029525, 67.72683495592, 
67.22997541, 60.662151860875, 60.95034911232, 67.22828506375, 
66.915609213375, 60.36643261348, 62.80147116288, 61.349764975875, 
56.381856553125, 65.138343264, 63.83578172832, 63.70508488183, 
65.374179521625, 64.57772389536, 61.54970130264, 68.11689275854, 
67.19495722272, 62.6687671468, 52.671580238965, 52.6496500866, 
49.0571970612, 65.35484747352, 62.99067352736, 56.28804189684, 
62.95487917906, 62.3909914575, 60.54756491358, 65.748093606, 
54.28226780316, 62.28287596851, 58.394627784, 58.54877398376, 
58.236602452875, 55.7946013138, 62.92579837536, 52.510981991085, 
69.18252370636, 67.45773857636, 56.322131567625, 60.486013583465, 
57.9529663461, 56.943494213215, 58.03166940876, 52.74560792151, 
51.3303436), rank = c(1, 2, 3, 4, 1, 2, 3, 1, 2, 3, 1, 2, 3, 
1, 2, 3, 1, 2, 1, 2, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 
1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 1, 2, 3, 1, 
2, 3, 1, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 
1, 2, 3, 1, 2, 3, 1, 2, 1, 2, 1, 2, 3, 1, 2, 1, 2, 3, 1, 2, 3, 
1, 2, 3), Year = c("2016", "2016", "2016", "2016", "2016", "2016", 
"2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", 
"2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", 
"2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", 
"2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", 
"2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", 
"2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", 
"2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", 
"2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", 
"2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", 
"2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", 
"2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", 
"2016", "2016", "2016", "2016", "2016", "2016")), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -100L), groups = structure(list(
PVC = c("2JA", "378", "6180113", "6C9", "ABBM", "ACAD", "ADHY", 
"AFA2", "AFWY", "AH62", "AHJZ", "ALX7", "AMNH", "AP1D", "APLV", 
"APWU", "AT0F", "AT4Y", "AT9F", "AV09", "AV1H", "AVH3", "AVZ1", 
"AWJR", "AXWA", "AXZ8", "AY45", "AZTN", "BABH", "BAHF", "BBHW", 
"BBJ2", "BBR7", "BC48", "BLU4", "BMWZ"), Year = c("2016", 
"2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", 
"2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", 
"2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", 
"2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", 
"2016", "2016", "2016"), .rows = structure(list(1:4, 5:7, 
8:10, 11:13, 14:16, 17:18, 19:20, 21:23, 24:26, 27:29, 
30:32, 33:34, 35:37, 38:40, 41:43, 44:46, 47:49, 50:51, 
52:54, 55:57, 58L, 59:61, 62:64, 65:67, 68:70, 71:73, 
74:76, 77:79, 80:82, 83:84, 85:86, 87:89, 90:91, 92:94, 
95:97, 98:100), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -36L), .drop = TRUE))

使用以下代码,您将获得匹配计数,不匹配计数和总数,并使用百分比列表示正确的百分比率。我添加了一个注释行,用于过滤掉ordre_ou等于9的行,因为我不完全确定您所说的像NA是什么意思。

library(magrittr)
library(dplyr)
test %>% 
#filter(ordre_ou != 9) %>% 
group_by(ID) %>% 
summarise(
correct = sum(ordre_ou == 3 & rank == 3),
wrong = sum(!(ordre_ou == 3 & rank == 3)),
total = n()
) %>% 
mutate(
perc = correct/total
)

最新更新