在R中使用tidyverse(或任何其他包)对tile进行计数的其他方法是什么?



我有WVS第六波数据帧。计算了外群信任指数(outgroup_index),我想把这个向量按位数分成3组。

我使用基R函数来做:

# Recoding will be based on tertiles
# Find the tretiles of the index 
tertiles <- quantile(filtered_df$outgroup_index, c(0:3) / 3)
# cut the target variable into tertiles
filtered_df$index_recoded <- with(
filtered_df,
cut(outgroup_index,
tertiles,
include.lowest = T)
)

但我想知道其他可能的和更整洁的方法来做到这一点(最好使用dplyr/tidyverse或任何其他包)?

数据:

structure(list(V2 = structure(c(643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643), label = "Country/region", format.spss = "F4.0", labels = c(`Not asked in survey` = -4, 
Algeria = 12, Azerbaijan = 31, Argentina = 32, Australia = 36, 
Armenia = 51, Brazil = 76, Belarus = 112, Chile = 152, China = 156, 
`Taiwan ROC` = 158, Colombia = 170, Cyprus = 196, Ecuador = 218, 
Estonia = 233, Georgia = 268, Palestine = 275, Germany = 276, 
Ghana = 288, Haiti = 332, `Hong Kong SAR` = 344, India = 356, 
Iraq = 368, Japan = 392, Kazakhstan = 398, Jordan = 400, `South Korea` = 410, 
Kuwait = 414, Kyrgyzstan = 417, Lebanon = 422, Libya = 434, Malaysia = 458, 
Mexico = 484, Morocco = 504, Netherlands = 528, `New Zealand` = 554, 
Nigeria = 566, Pakistan = 586, Peru = 604, Philippines = 608, 
Poland = 616, Qatar = 634, Romania = 642, Russia = 643, Rwanda = 646, 
Singapore = 702, Slovenia = 705, `South Africa` = 710, Zimbabwe = 716, 
Spain = 724, Sweden = 752, Thailand = 764, `Trinidad and Tobago` = 780, 
Tunisia = 788, Turkey = 792, Ukraine = 804, Egypt = 818, `United States` = 840, 
Uruguay = 858, Uzbekistan = 860, Yemen = 887), class = c("haven_labelled", 
"vctrs_vctr", "double")), V105 = structure(c(4, 3, 3, 4, 3, 4, 
4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 3, 
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 2, 2, 2, 1, 1, 
2, 4, 2, 2, 2, 1, 2, 1, 4, 2, 1, 4, 2, 3, 3, 2, 3, 2, 3, 2, 3, 
2, 2, 3, 3, 3, 3, 3, 3, NA, 3, 3, 4, 2, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 2, 2, 3, 3, 3, 2, 3, NA), label = "Trust: People you meet for the first time (B)", format.spss = "F3.0", labels = c(`SE:Inapplicable ; RU:Inappropriate response; HT: Dropped out` = -5, 
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1, 
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3, 
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr", 
"double")), V106 = structure(c(3, 2, NA, 4, 2, 4, 4, 3, 3, 4, 
3, 3, 4, 4, 4, 4, NA, NA, NA, NA, 3, 2, 2, 2, 2, 2, 2, 3, 3, 
3, 3, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 
2, 2, 1, 1, 2, 1, 4, 2, 1, 4, 2, 3, 3, 2, 2, 2, 3, 2, 3, 2, 2, 
NA, 3, NA, 3, 3, 3, 2, 3, 3, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 2, 2, 2, 3, 2, 2, 2, 3), label = "Trust: People of another religion (B)", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; HT: Dropped` = -5, 
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1, 
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3, 
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr", 
"double")), V107 = structure(c(3, 4, NA, 4, 2, 4, 4, 3, 3, 4, 
3, 3, 4, 4, 4, 4, 3, 2, NA, NA, 3, 2, 2, 2, 2, 2, 2, 3, 3, 3, 
3, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2, 
2, 1, 1, 2, 1, 4, 2, 1, 3, 2, 3, 2, 2, 2, 2, 3, 2, 3, 2, 2, NA, 
3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
2, 3, 2, 3, 2, 2, 2, 3), label = "Trust: People of another nationality (B)", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; HT: Dropped` = -5, 
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1, 
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3, 
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr", 
"double")), V248 = structure(c(9, 8, 5, 8, 8, 8, 8, 9, 7, 9, 
9, 5, 5, 6, 5, 5, 5, 5, 5, 4, 9, 9, 4, 9, 9, 3, 6, 9, 8, 9, 9, 
9, NA, 9, 5, 9, 5, 7, 9, 5, 5, 9, 9, 8, 9, 9, 5, 5, 5, 9, 9, 
8, 5, 8, 9, 9, 5, 8, 9, 9, 9, 7, 7, 5, 4, 6, 9, 6, 6, 9, 9, 5, 
6, 7, 5, 4, 7, 7, 5, 5, 5, 5, 8, 9, 8, 9, 9, 9, 9, 9, 9, 9, 5, 
9, 9, 5, 9, 8, 9, 5, 5), label = "Highest educational level attained", format.spss = "F3.0", labels = c(`AU: Inapplicable (No-school education) DE,SE:Inapplicable ;` = -5, 
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1, 
`No formal education` = 1, `Incomplete primary school` = 2, `Complete primary school` = 3, 
`Incomplete secondary school: technical/ vocational type` = 4, 
`Complete secondary school: technical/ vocational type` = 5, 
`Incomplete secondary school: university-preparatory type` = 6, 
`Complete secondary school: university-preparatory type` = 7, 
`Some university-level education, without degree` = 8, `University - level education, with degree` = 9
), class = c("haven_labelled", "vctrs_vctr", "double")), V59 = structure(c(9, 
5, 6, 8, 6, 7, NA, 8, 5, 3, 4, 7, 2, 1, 1, 6, 8, 6, NA, NA, 1, 
5, NA, 6, 1, 2, 9, 5, 6, NA, NA, 3, 6, 6, 4, NA, 6, 6, NA, NA, 
3, 9, 8, 10, 9, 6, 10, 9, 8, 9, 9, 10, 6, 4, 4, 6, 4, 10, 3, 
3, 4, 3, 5, 4, 7, 3, 3, 4, 3, 7, 4, 6, 4, 1, 1, 6, 1, 1, 6, 1, 
1, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 7, 3, 1, 5, 6, 7, 2, 4, 5
), label = "Satisfaction with financial situation of household", format.spss = "F3.0", labels = c(`HT: Dropped out survey;DE,SE:Inapplicable ; RU:Inappropriate` = -5, 
`Not asked` = -4, `No answer` = -2, `Don<U+00B4>t know` = -1, 
Dissatisfied = 1, `2` = 2, `3` = 3, `4` = 4, `5` = 5, `6` = 6, 
`7` = 7, `8` = 8, `9` = 9, Satisfied = 10), class = c("haven_labelled", 
"vctrs_vctr", "double")), V237 = structure(c(3, 2, 2, 2, NA, 
1, 2, 2, 1, 2, 2, 2, 2, 3, 2, 1, 1, 3, 2, 2, NA, 2, 2, 3, 4, 
2, 2, 1, NA, 1, 1, 1, NA, NA, NA, 1, NA, 1, 1, NA, 2, 1, 2, 1, 
1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 
2, 3, 2, 1, 2, 3, 2, 2, 2, NA, 2, 2, 4, 2, 2, 2, 1, 1, 2, 1, 
2, 3, 2, 2, 1, 2, 2, 2, 3, 3, 2, 3, 2, 2, NA, 3), label = "Family savings during past year", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; BH: Missing;` = -5, 
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U+00B4>t know` = -1, 
`Save money` = 1, `Just get by` = 2, `Spent some savings and borrowed money` = 3, 
`Spent savings and borrowed money` = 4), class = c("haven_labelled", 
"vctrs_vctr", "double")), V105_rec = c(1, 2, 2, 1, 2, 1, 1, 1, 
1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 3, 3, 3, 4, 4, 3, 1, 
3, 3, 3, 4, 3, 4, 1, 3, 4, 1, 3, 2, 2, 3, 2, 3, 2, 3, 2, 3, 3, 
2, 2, 2, 2, 2, 2, NA, 2, 2, 1, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
4, 4, 3, 3, 2, 2, 2, 3, 2, NA), V106_rec = c(2, 3, NA, 1, 3, 
1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, NA, NA, NA, NA, 2, 3, 3, 3, 
3, 3, 3, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 3, 
3, 4, 4, 3, 3, 3, 3, 4, 4, 3, 4, 1, 3, 4, 1, 3, 2, 2, 3, 3, 3, 
2, 3, 2, 3, 3, NA, 2, NA, 2, 2, 2, 3, 2, 2, 1, 3, 4, 4, 4, 4, 
4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 3, 3, 3, 2), V107_rec = c(2, 
1, NA, 1, 3, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 3, NA, NA, 2, 
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 4, 
3, 3, 3, 3, 4, 4, 3, 4, 3, 3, 4, 4, 3, 4, 1, 3, 4, 2, 3, 2, 3, 
3, 3, 3, 2, 3, 2, 3, 3, NA, 2, 3, 2, 2, 2, 3, 2, 2, 2, 3, 4, 
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 3, 2, 3, 3, 3, 2), outgroup_index = c(1.66666666666667, 
2, 2, 1, 2.66666666666667, 1, 1, 1.66666666666667, 1.66666666666667, 
1, 1.66666666666667, 2, 1, 1, 1, 1, 1.5, 2.5, 2, 2, 2, 3, 3, 
3, 3, 3, 2.66666666666667, 2, 2, 2, 2, 1.33333333333333, 1.33333333333333, 
2, 2, 2, 2, 2, 2, 2, 2, 2.66666666666667, 2, 3, 3, 3, 4, 4, 3, 
2.66666666666667, 3, 3, 3.66666666666667, 4, 3, 4, 1, 3, 4, 1.33333333333333, 
3, 2, 2.33333333333333, 3, 2.66666666666667, 3, 2, 3, 2, 3, 3, 
2, 2, 2.5, 2, 2, 2, 3, 2, 2, 1.33333333333333, 3, 4, 4, 4, 4, 
4, 4, 4, 4, 4, 4, 4, 3, 2.66666666666667, 2.66666666666667, 2, 
2.66666666666667, 3, 2.66666666666667, 2), V59_rec = structure(c(5, 
3, 3, 4, 3, 4, NA, 4, 3, 2, 2, 4, 1, 1, 1, 3, 4, 3, NA, NA, 1, 
3, NA, 3, 1, 1, 5, 3, 3, NA, NA, 2, 3, 3, 2, NA, 3, 3, NA, NA, 
2, 5, 4, 5, 5, 3, 5, 5, 4, 5, 5, 5, 3, 2, 2, 3, 2, 5, 2, 2, 2, 
2, 3, 2, 4, 2, 2, 2, 2, 4, 2, 3, 2, 1, 1, 3, 1, 1, 3, 1, 1, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 4, 2, 1, 3, 3, 4, 1, 2, 3), labels = c(`Not satisfied at all` = 1, 
`Rather not satisfied` = 2, `Neither satisfied, nor not satisfied` = 3, 
`Rather satisfied` = 4, Satisfied = 5), class = c("haven_labelled", 
"vctrs_vctr", "double")), V248_dummy = structure(c(1, 1, 0, 1, 
1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 
0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 
0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0), labels = c(`A university education and higher` = 1, 
`No university education` = 0), class = c("haven_labelled", "vctrs_vctr", 
"double")), V237_rec = structure(c(3, 2, 2, 2, NA, 1, 2, 2, 1, 
2, 2, 2, 2, 3, 2, 1, 1, 3, 2, 2, NA, 2, 2, 3, 3, 2, 2, 1, NA, 
1, 1, 1, NA, NA, NA, 1, NA, 1, 1, NA, 2, 1, 2, 1, 1, 1, 1, 1, 
1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 2, 3, 2, 1, 
2, 3, 2, 2, 2, NA, 2, 2, 3, 2, 2, 2, 1, 1, 2, 1, 2, 3, 2, 2, 
1, 2, 2, 2, 3, 3, 2, 3, 2, 2, NA, 3), labels = c(`Save money` = 1, 
`Just get by` = 2, `Spent savings and borrowed money` = 3), class = c("haven_labelled", 
"vctrs_vctr", "double"))), row.names = c(NA, -101L), class = c("tbl_df", 
"tbl", "data.frame"), label = "filelabel")

有点不直观,但是ggplot2具有您正在寻找的功能。

filtered_df %>%
mutate(index_recoded = ggplot2::cut_interval(outgroup_index, 3))

和验证水平是相同的:

# smaller dput would be nice
start <- Data
all(
{
filtered_df  <- start
tertiles <- quantile(filtered_df$outgroup_index, c(0:3) / 3)
filtered_df$index_recoded <- with(
filtered_df,
cut(outgroup_index,
tertiles,
include.lowest = T)
)
filtered_df$index_recoded
} == {
tv_df <- start
tv_df %>%
mutate(index_recoded = ggplot2::cut_interval(outgroup_index, 3)) %>%
pull(index_recoded)
}
)
[1] TRUE

如果您想将数据划分为固定的间隔,则cut具有更简单的语法。

filtered_df$index_recoded <- cut(filtered_df$outgroup_index, 3)

您也可以将它与labels = FALSE一起使用,以获得1,2和3作为输出。

filtered_df$index_recoded <- cut(filtered_df$outgroup_index, 3, labels = FALSE)

最新更新