我搜索一种有效的方法来组合这两个数据帧: 一个包含问题及其答案和关联点(按行组织的问题(
answer <- data.frame(num_question = c("X01","X02","X03","X04"),
ans = c("A","C","B","C"),
point = c(1,2,1,0.5))
num_question ans point
X01 A 1.0
X02 C 2.0
X03 B 1.0
X04 C 0.5
另一个是候选人的数量和他们的答案(按列组织的问题(
extract <- data.frame(cand = c("can1","can2","can3"),
X01 = c("A","A","B"),
X02 = c("B","C","C"),
X03 = c("B","B","B"),
X04 = c("C","C","A"))
cand X01 X02 X03 X04
can1 A B B C
can2 A C B C
can3 B C B A
如何达到下一个结果?数据框的维度与第二个数据框相同,但我们将找到位于数据框答案中的点,而不是答案。
期望输出:
result_research <- data.frame(cand = c("can1","can2","can3"),
X01 = c(1,1,0),
X02 = c(0,2,2),
X03 = c(1,1,1),
X04 = c(0.5,0.5,0))
cand X01 X02 X03 X04
can1 1 0 1 0.5
can2 1 2 1 0.5
can3 0 2 1 0.0
提前非常感谢
这也可以使用apply
对基本 R 来完成:
extract[, -1] <- t(apply(extract[, -1], 1, function(x)
ifelse(x[match(names(x), answer$num_question)] == answer$ans, answer$point, 0)
))
extract
#> cand X01 X02 X03 X04
#> 1 can1 1 0 1 0.5
#> 2 can2 1 2 1 0.5
#> 3 can3 0 2 1 0.0
对于extract
中的每一行(即每个候选人(,使用match
匹配 data.frame 之间的问题编号。如果给定的答案与正确答案一致,则返回关联的点,否则返回零。
或者不使用apply
仅使用矢量化操作:
answer <- answer[match(answer$num_question, names(extract)[-1]), ]
extract[, -1] <- t((t(extract[, -1]) == answer$ans) * answer$point)
extract
#> cand X01 X02 X03 X04
#> 1 can1 1 0 1 0.5
#> 2 can2 1 2 1 0.5
#> 3 can3 0 2 1 0.0
library(tidyverse)
tmp <- extract %>%
gather(num_question, can_ans, -cand) %>% # turn the extract data.frame into long format
left_join(answer, by="num_question") %>% #merge extract by question number into a single data.frame
mutate(correct = (can_ans == ans)+0) %>% # is candidate answer the same as the correct answer (1 = TRUE, 0 = FALSE)
mutate(result = correct*point) # multiply correct answer (1) with points given
#turn the data.frame into wide format
tmp %>%
select(cand, num_question, result) %>%
spread(num_question,result)
# cand X01 X02 X03 X04
# 1 can1 1 0 1 0.5
# 2 can2 1 2 1 0.5
# 3 can3 0 2 1 0.0
这是一个基本的 R 方法,具有stack
和unstack
,即
d1 <- stack(extract[-1])
d1$values <- answer$point[match(do.call(paste, d1), paste(answer$ans, answer$num_question))]
d1$values <- replace(d1$values, is.na(d1$values), 0)
cbind.data.frame(cand = extract$cand, unstack(d1))
# cand X01 X02 X03 X04
#1 can1 1 0 1 0.5
#2 can2 1 2 1 0.5
#3 can3 0 2 1 0.0
我们可以使用dplyr
和tidyr
将数据gather
为长格式,left_join
num_question
和ans
,将NA
s替换为0,并将数据spread
为宽格式。
library(dplyr)
library(tidyr)
extract %>%
gather(key, value, -cand) %>%
left_join(answer, by = c("key" = "num_question", "value" = "one_answers")) %>%
replace_na(list(point = 0)) %>%
select(-value) %>%
spread(key, point)
# cand X01 X02 X03 X04
#1 can1 1 0 1 0.5
#2 can2 1 2 1 0.5
#3 can3 0 2 1 0.0
library(dplyr)
library(reshape2)
dataQA <- melt(extract, id ="cand") %>% #long format for extract
rename(num_question = variable, ans = value) %>%
left_join(answer) %>% #merge dataframes
mutate(point = ifelse(is.na(point), 0, point)) %>% #wrong answer = 0
select(cand, num_question, point) %>%
dcast(cand ~ num_question) #back to wide format
输出:
cand X01 X02 X03 X04
1 can1 1 0 1 0.5
2 can2 1 2 1 0.5
3 can3 0 2 1 0.0