假设我们有这个向量:
products <- c(a, b, d, f, g, h, i, j, m, o, t, z)
下面是这样一个数据帧:
seller_a seller_b seller_c
a b d
d d e
g g g
h l h
t n t
z y w
我想在数据帧中包括一个额外的行,它将向每个seller
列与products
向量匹配的扩展发出信号。
换句话说,我的目标是使原始数据帧看起来像这样:
seller_a seller_b seller_c
6 3 4
a b d
d d e
g g g
h l h
t n t
z y w
在dplyr
中使用summarise
和across
library(dplyr)
DF %>% summarise(across(everything(), ~as.character(sum(. %in% products)))) %>%
bind_rows(., DF)
#> seller_a seller_b seller_c
#> 1 6 3 4
#> 2 a b d
#> 3 d d e
#> 4 g g g
#> 5 h l h
#> 6 t n t
#> 7 z y w
创建于2021-06-07由reprex包(v2.0.0(
您还可以使用tibble
中的add_row
函数向数据集添加额外的行:
library(dplyr)
df %>%
add_row(seller_a = as.character(sum(df$seller_a %in% products)),
seller_b = as.character(sum(df$seller_b %in% products)),
seller_c = as.character(sum(df$seller_c %in% products)),
.before = 1)
# A tibble: 7 x 3
seller_a seller_b seller_c
<chr> <chr> <chr>
1 6 3 4
2 a b d
3 d d e
4 g g g
5 h l h
6 t n t
7 z y w
使用末尾注释中可重复显示的输入
as.data.frame(lapply(DF, function(x) c(sum(x %in% products), x)))
## seller_a seller_b seller_c
## 1 6 3 4
## 2 a b d
## 3 d d e
## ...snip...
数字矢量
但是,列的所有元素都必须是相同的类型,因此数字将被强制为字符。您可能更喜欢只创建一个单独的数字矢量。
sapply(DF, function(x) sum(x %in% products))
## seller_a seller_b seller_c
## 6 3 4
S3
这可能有些过头了,但可以创建一个新的S3类,将产品编号存储为数字属性而不是行,但在打印时显示为行。
as.data.frame1 <- function(x, ...) UseMethod("as.data.frame1")
as.data.frame1.data.frame <- function(x, product, ...) {
out <- structure(x, class = c("data.frame1", class(x)))
attr(out, "product") <- sapply(DF, function(x) sum(x %in% products))
out
}
format.data.frame1 <- function(x, ...) {
format(as.data.frame(rbind(attr(x, "product"), x)))
}
print.data.frame1 <- function(x, ...) {
print(format(x), ...)
}
DF1 <- as.data.frame1(DF, products)
DF1
## seller_a seller_b seller_c
## 1 6 3 4
## 2 a b d
## 3 d d e
## ...snip...
attr(DF1, "product") # numeric vector
## seller_a seller_b seller_c
## 6 3 4
as.data.frame(DF1)
## seller_a seller_b seller_c
## 1 a b d
## 2 d d e
## 3 g g g
## ...snip...
备注
products <- scan(text = "a, b, d, f, g, h, i, j, m, o, t, z",
what = "", sep = ",", strip.white = TRUE)
Lines <- "seller_a seller_b seller_c
a b d
d d e
g g g
h l h
t n t
z y w"
DF <- read.table(text = Lines, header = TRUE)
数据:
df <- tibble(
a = c("a", "d", "g", "h", "t", "z"),
b = c("b", "d", "g", "l", "n", "y"),
c = c("d", "e", "g", "h", "t", "w")
)
products <- c("a", "b", "d", "f", "g", "h", "i", "j", "m", "o", "t", "z")
代码:
library(tidyverse)
df %>% rbind(map_int(., ~sum(products %in% .x)), .)
a b c
<chr> <chr> <chr>
1 6 3 4
2 a b d
3 d d e
4 g g g
5 h l h
6 t n t
7 z y w
请注意,第一行中的数字将是字符。此外,如果列是因素,代码将不起作用(这就是我使用tibble
的原因(
具有data.table
:的解决方案
library(data.table)
products <- c("a", "b", "d", "f", "g", "h", "i", "j", "m", "o", "t", "z")
DT <- data.table(
seller_a = c("a", "d", "g", "h", "t", "z"),
seller_b = c("b", "d", "g", "l", "n", "y"),
seller_c = c("d", "e", "g", "h", "t", "w")
)
DT1 <- DT[,.(seller_a = length(which(products%in%seller_a==TRUE)),
seller_b = length(which(products%in%seller_b==TRUE)),
seller_c = length(which(products%in%seller_c==TRUE)))]
# -------------------
> DT1
seller_a seller_b seller_c
1: 6 3 4
> rbind(DT1, DT)
seller_a seller_b seller_c
1: 6 3 4
2: a b d
3: d d e
4: g g g
5: h l h
6: t n t
7: z y w
基本R选项-
rbind(sapply(df, function(x) sum(x %in% products)), df)
# a b c
#1 6 3 4
#2 a b d
#3 d d e
#4 g g g
#5 h l h
#6 t n t
#7 z y w
我们可以使用
library(dplyr)
df %>%
summarise(across(everything(), ~ c(sum(products %in% .), .)))
-输出
# A tibble: 7 x 3
a b c
<chr> <chr> <chr>
1 6 3 4
2 a b d
3 d d e
4 g g g
5 h l h
6 t n t
7 z y w
使用基本R,您可以执行以下
#///////////////////
#your data
products <- c("a", "b", "d", "f", "g", "h", "i", "j", "m", "o", "t", "z")
seller_a <- c("a", "d", "g", "h", "t", "z")
seller_b <- c("b", "d", "g", "l", "n", "y")
seller_c <- c("d", "e", "g", "h", "t", "w")
d <- as.data.frame(cbind(seller_a,seller_b,seller_c))
#///////////////////
a <- c(sum(d$seller_a %in% products), sum(d$seller_b %in% products), sum(d$seller_c %in% products))
d <- rbind(a,d)