我的第一个for循环有问题,它计算X数据帧的行在Y数据帧中存在的次数



我有这两个数据帧:

df1<-structure(list(Measures = c("space and shape", "space and shape", 
"space and shape", "space and shape", "space and shape", "change and relationships", 
"change and relationships", "change and relationships", "change and relationships", 
"change and relationships", "space and shape", "space and shape", 
"space and shape", "space and shape", "uncertainty and data", 
"quantity", "uncertainty and data", "uncertainty and data", "uncertainty and data", 
"quantity", "change and relationships", "change and relationships", 
"space and shape", "space and shape", "space and shape", "quantity", 
"quantity", "quantity", "quantity", "quantity", "uncertainty and data", 
"change and relationships", "quantity", "quantity", "uncertainty and data", 
"change and relationships", "uncertainty and data", "quantity", 
"change and relationships", "change and relationships", "quantity", 
"quantity", "quantity", "quantity", "quantity", "quantity", "change and relationships", 
"uncertainty and data", "change and relationships", "uncertainty and data", 
"uncertainty and data", "uncertainty and data", "quantity", "quantity", 
"quantity", "space and shape", "change and relationships", "quantity", 
"space and shape", "space and shape", "change and relationships", 
"change and relationships", "uncertainty and data", "uncertainty and data", 
"quantity", "change and relationships", "quantity", "change and relationships", 
"space and shape", "quantity", "quantity", "quantity", "space and shape", 
"space and shape", "space and shape", "uncertainty and data", 
"uncertainty and data", "uncertainty and data", "change and relationships", 
"change and relationships", "change and relationships", "uncertainty and data", 
"uncertainty and data", "uncertainty and data", "change and relationships", 
"change and relationships", "change and relationships", "change and relationships", 
"change and relationships", "uncertainty and data", "space and shape", 
"space and shape", "uncertainty and data", "uncertainty and data", 
"uncertainty and data", "uncertainty and data", "uncertainty and data", 
"quantity", "quantity", "space and shape", "space and shape", 
"space and shape", "space and shape", "change and relationships", 
"space and shape", "space and shape", "quantity", "change and relationships", 
"change and relationships"), Format = c("Constructed Response Expert", 
"Constructed Response Manual", "Constructed Response Expert", 
"Simple Multiple Choice", "Constructed Response Auto-coded", 
"Constructed Response Expert", "Constructed Response Expert", 
"Constructed Response Expert", "Complex Multiple Choice", "Complex Multiple Choice", 
"Complex Multiple Choice", "Simple Multiple Choice", "Constructed Response Expert", 
"Constructed Response Expert", "Complex Multiple Choice", "Constructed Response Manual", 
"Simple Multiple Choice", "Complex Multiple Choice", "Simple Multiple Choice", 
"Constructed Response Manual", "Constructed Response Manual", 
"Constructed Response Expert", "Simple Multiple Choice", "Constructed Response Expert", 
"Constructed Response Auto-coded", "Constructed Response Manual", 
"Complex Multiple Choice", "Constructed Response Manual", "Simple Multiple Choice", 
"Simple Multiple Choice", "Simple Multiple Choice", "Simple Multiple Choice", 
"Complex Multiple Choice", "Simple Multiple Choice", "Constructed Response Auto-coded", 
"Constructed Response Expert", "Constructed Response Manual", 
"Constructed Response Manual", "Constructed Response Expert", 
"Constructed Response Manual", "Complex Multiple Choice", "Constructed Response Expert", 
"Simple Multiple Choice", "Constructed Response Expert", "Constructed Response Manual", 
"Simple Multiple Choice", "Constructed Response Expert", "Simple Multiple Choice", 
"Constructed Response Manual", "Simple Multiple Choice", "Simple Multiple Choice", 
"Simple Multiple Choice", "Constructed Response Manual", "Constructed Response Manual", 
"Simple Multiple Choice", "Simple Multiple Choice", "Constructed Response Expert", 
"Constructed Response Manual", "Constructed Response Manual", 
"Simple Multiple Choice", "Constructed Response Manual", "Constructed Response Expert", 
"Simple Multiple Choice", "Simple Multiple Choice", "Simple Multiple Choice", 
"Constructed Response Expert", "Constructed Response Manual", 
"Simple Multiple Choice", "Constructed Response Expert", "Simple Multiple Choice", 
"Constructed Response Manual", "Constructed Response Expert", 
"Complex Multiple Choice", "Complex Multiple Choice", "Constructed Response Expert", 
"Constructed Response Expert", "Constructed Response Manual", 
"Constructed Response Expert", "Constructed Response Manual", 
"Constructed Response Expert", "Constructed Response Expert", 
"Constructed Response Manual", "Constructed Response Expert", 
"Constructed Response Expert", "Simple Multiple Choice", "Simple Multiple Choice", 
"Constructed Response Manual", "Constructed Response Expert", 
"Simple Multiple Choice", "Constructed Response Expert", "Constructed Response Manual", 
"Complex Multiple Choice", "Constructed Response Manual", "Constructed Response Manual", 
"Complex Multiple Choice", "Simple Multiple Choice", "Simple Multiple Choice", 
"Simple Multiple Choice", "Constructed Response Manual", "Simple Multiple Choice", 
"Constructed Response Expert", "Constructed Response Manual", 
"Constructed Response Manual", "Constructed Response Expert", 
"Constructed Response Manual", "Constructed Response Expert", 
"Simple Multiple Choice", "Constructed Response Manual", "Complex Multiple Choice"
)), row.names = c(NA, -109L), class = "data.frame")




df2<- structure(list(Measures = c("space and shape", "space and shape", 
"space and shape", "space and shape"), Format = c("Constructed Response Expert", 
"Constructed Response Manual", "Simple Multiple Choice", "Constructed Response Auto-coded"
)), row.names = c(1L, 2L, 4L, 5L), class = "data.frame")

我使用这段代码来对第一个数据帧的所有行进行子集设置,这些行存在于第二个数据帧中。

library(tidyverse)
inner_join(df1, df2)

但我也想计数df2的每一行在第一个数据帧中可用的行数,并将这些计数存储在向量中。这有点难以用语言解释,所以我制作了一个短视频来解释:

https://youtu.be/gxIaNxSXSUM

我试着自己做,并创建了一个for循环来做这件事。这是我第一次真正使用循环,但它失败得很惨:

for(i in 1:nrow(df2)) {  
b<- data.frame('NA','NA')
b[i,]<- inner_join(df1, df2[i,])
num[i]<- nrow(b)
}

当我打印数字时,我得到

[1] 1 2 3 4 5 6 7 8

一种策略可以是在inner_join之前对df1中的组进行计数,而不是使用循环。一种方法可能看起来像这样:

library(dplyr)
df1 %>% 
count(across(everything())) %>% 
inner_join(df2)
#> Joining, by = c("Measures", "Format")
#>          Measures                          Format n
#> 1 space and shape Constructed Response Auto-coded 1
#> 2 space and shape     Constructed Response Expert 2
#> 3 space and shape     Constructed Response Manual 1
#> 4 space and shape          Simple Multiple Choice 1

我选择了df1的一个子集,因为它相当大:

df1 <- structure(list(
Measures = c(
"space and shape",
"space and shape",
"space and shape",
"space and shape",
"space and shape",
"change and relationships"
),
Format = c(
"Constructed Response Expert",
"Constructed Response Manual",
"Constructed Response Expert",
"Simple Multiple Choice",
"Constructed Response Auto-coded",
"Constructed Response Expert"
)
),
row.names = c(NA, 6L),
class = "data.frame"
)
df2 <- structure(list(
Measures = c(
"space and shape",
"space and shape",
"space and shape",
"space and shape"
),
Format = c(
"Constructed Response Expert",
"Constructed Response Manual",
"Simple Multiple Choice",
"Constructed Response Auto-coded"
)
),
row.names = c(1L, 2L, 4L, 5L),
class = "data.frame")

使用dplyrtidyr是您想要的吗?

library(dplyr)
library(tidyr)
df <- 
df1 %>% 
left_join(df2, keep = TRUE) %>% 
group_by(Measures.y, Format.y) %>% 
summarise(n = n())
``` r
#> # A tibble: 5 x 3
#> # Groups:   Measures.y [2]
#>   Measures.y      Format.y                            n
#>   <chr>           <chr>                           <int>
#> 1 space and shape Constructed Response Auto-coded     2
#> 2 space and shape Constructed Response Expert         9
#> 3 space and shape Constructed Response Manual         6
#> 4 space and shape Simple Multiple Choice              6
#> 5 <NA>            <NA>                               86

创建于2021-11-26由reprex包(v2.0.1(

以下是附加评论;要查看df2中没有实例的情况,可以调整代码:

df <- 
df1 %>% 
left_join(df2, keep = TRUE) %>%
group_by(Measures.x, Format.x, Measures.y, Format.y) %>% 
summarise(n = n()) %>% 
mutate(n = if_else(is.na(Measures.y), 0L, n))

最新更新