r-计算数据帧中值之间交互的比例



我有一个数据帧,它的结构与这个玩具数据集类似。我宁愿生成表示二进制数据列(value1、value2和value3(之间值交互的新列,而不是处理二进制数据,因为只有8种可能的值组合(例如,TRUE.TRUE.TRUE、TRUE.TRUE.FALSE等(。

具体来说,我想计算每个对象和主题的每个组合的比例。

subject     object      value1     value2   value3 
1           A           TRUE       TRUE     FALSE
1           A           TRUE       TRUE     TRUE
1           B           TRUE       FALSE    TRUE
1           B           TRUE       FALSE    TRUE
1           B           TRUE       TRUE     TRUE
2           B           TRUE       FALSE    FALSE
2           A           TRUE       TRUE     FALSE
2           B           FALSE      FALSE    FALSE
3           A           TRUE       TRUE     FALSE
3           B           FALSE      TRUE     FALSE
3           B           TRUE       TRUE     TRUE
...         ...         ...        ...      ...

期望输出:

subject     object      combination        value    
1           A           True.True.True    .5                 
1           A           True.True.False   .5   
1           B           True.True.True    .33
1           B           True.False.True   .67              
...
etc for subject 2 and 3... 

你可以做:

df%>%
group_by(subject, object)%>%
mutate(val = str_c(value1,value2,value3,sep = "."),
value = c(prop.table(table(val))[val]))
# A tibble: 11 x 7
# Groups:   subject, object [6]
subject object value1 value2 value3 val               value
<int> <chr>  <lgl>  <lgl>  <lgl>  <chr>             <dbl>
1       1 A      TRUE   TRUE   FALSE  TRUE.TRUE.FALSE   0.5  
2       1 A      TRUE   TRUE   TRUE   TRUE.TRUE.TRUE    0.5  
3       1 B      TRUE   FALSE  TRUE   TRUE.FALSE.TRUE   0.667
4       1 B      TRUE   FALSE  TRUE   TRUE.FALSE.TRUE   0.667
5       1 B      TRUE   TRUE   TRUE   TRUE.TRUE.TRUE    0.333
6       2 B      TRUE   FALSE  FALSE  TRUE.FALSE.FALSE  0.5  
7       2 A      TRUE   TRUE   FALSE  TRUE.TRUE.FALSE   1    
8       2 B      FALSE  FALSE  FALSE  FALSE.FALSE.FALSE 0.5  
9       3 A      TRUE   TRUE   FALSE  TRUE.TRUE.FALSE   1    
10       3 B      FALSE  TRUE   FALSE  FALSE.TRUE.FALSE  0.5  
11       3 B      TRUE   TRUE   TRUE   TRUE.TRUE.TRUE    0.5  

试试这个:

library(tidyverse)
#Data
df <- structure(list(subject = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 
3L, 3L), object = c("A", "A", "B", "B", "B", "B", "A", "B", "A", 
"B", "B"), value1 = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
FALSE, TRUE, FALSE, TRUE), value2 = c(TRUE, TRUE, FALSE, FALSE, 
TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE), value3 = c(FALSE, 
TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE
)), class = "data.frame", row.names = c(NA, -11L))
#Code
df %>% mutate(value=paste(value1,value2,value3,sep = '.')) %>% group_by(subject,object,value) %>%
summarize(N=n()) %>% ungroup() %>% group_by(subject,object) %>% mutate(Prop=N/sum(N))
# A tibble: 10 x 5
# Groups:   subject, object [6]
subject object value                 N  Prop
<int> <chr>  <chr>             <int> <dbl>
1       1 A      TRUE.TRUE.FALSE       1 0.5  
2       1 A      TRUE.TRUE.TRUE        1 0.5  
3       1 B      TRUE.FALSE.TRUE       2 0.667
4       1 B      TRUE.TRUE.TRUE        1 0.333
5       2 A      TRUE.TRUE.FALSE       1 1    
6       2 B      FALSE.FALSE.FALSE     1 0.5  
7       2 B      TRUE.FALSE.FALSE      1 0.5  
8       3 A      TRUE.TRUE.FALSE       1 1    
9       3 B      FALSE.TRUE.FALSE      1 0.5  
10       3 B      TRUE.TRUE.TRUE        1 0.5  

最新更新