我有一个数据框架看起来像这样,与NA值
<表类>
id
cat1
cat2
cat3
cat4
tbody><<tr>1 苹果香蕉香蕉 橙色 2橙色 香蕉 苹果 橙色 3 苹果 NA NA 橙色 4橙色 香蕉 苹果 NA 表类>
我们可以从这里使用Mode
函数
Mode <- function(x) {
x <- na.omit(x)
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
,并应用到每一行。
cbind(df[1], cat = apply(df[-1], 1, Mode))
# id cat
#1 1 banana
#2 2 orange
df <- structure(list(id = 1:2, cat1 = c("apple", "orange"), cat2 = c("banana",
"banana"), cat3 = c("banana", "apple"), cat4 = c("orange", "orange"
)), class = "data.frame", row.names = c(NA, -2L))
data.table
选项
setDT(df)[, .(cat = names(tail(sort(table(na.omit(unlist(.SD)))), 1))), id]
为
id cat
1: 1 banana
2: 2 orange
带apply
的base R选项
cbind(
df[1],
cat = apply(
df[-1],
1,
function(x) names(tail(sort(table(na.omit(unlist(x)))), 1))
)
)
为
id cat
1 1 banana
2 2 orange
> dput(df)
structure(list(id = 1:2, cat1 = c("apple", "orange"), cat2 = c("banana",
"banana"), cat3 = c("banana", "apple"), cat4 = c("orange", "orange"
)), class = "data.frame", row.names = c(NA, -2L))
可以吗?
library(dplyr)
library(tidyr)
df %>% pivot_longer(cols = -id) %>% count(id, value) %>%
group_by(id) %>%
summarise(cat = value[which.max(n)])
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 2 x 2
id cat
<int> <chr>
1 1 banana
2 2 orange
使用tidyverse
library(dplyr)
library(purrr)
df %>%
transmute(id, cat = pmap_chr(.[-1], ~ Mode(c(...))))
# id cat
#1 1 banana
#2 2 orange
,
Mode <- function(x) {
x <- na.omit(x)
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
数据df <- structure(list(id = 1:2, cat1 = c("apple", "orange"), cat2 = c("banana",
"banana"), cat3 = c("banana", "apple"), cat4 = c("orange", "orange"
)), class = "data.frame", row.names = c(NA, -2L))