r-使用mutate dplyr添加列,用于分组行+子分组



这是我的最小示例,是我的按单词划分的书籍数据集的简化版本。

structure(list(word = c("in", "großer", "erregung", "umstehen", 
"bauersleute", "knechte", "und", "mägde", "das", "gehöft", 
"des", "servaz", "amareller", "bauers", "im", "hemmernmoos", 
"und", "besprechen", "den", "einleitung", "lieber", "leser", 
"weißt", "du", "was", "das", "wort", "greenhorn", "bedeutet", 
"eine", "höchst", "ärgerliche", "und", "despektierliche", "bezeichnung", 
"für", "denjenigen", "auf", "zum", "alm", "öhi", "hinauf", 
"vom", "freundlichen", "dorfe", "maienfeld", "führt", "ein", 
"fußweg", "durch", "grüne", "baumreiche", "fluren", "bis", 
"zum", "fuße", "der"), word_id = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 
16L, 17L, 18L, 19L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L), book = c("bergrichters", 
"bergrichters", "bergrichters", "bergrichters", "bergrichters", 
"bergrichters", "bergrichters", "bergrichters", "bergrichters", 
"bergrichters", "bergrichters", "bergrichters", "bergrichters", 
"bergrichters", "bergrichters", "bergrichters", "bergrichters", 
"bergrichters", "bergrichters", "winnetou", "winnetou", "winnetou", 
"winnetou", "winnetou", "winnetou", "winnetou", "winnetou", "winnetou", 
"winnetou", "winnetou", "winnetou", "winnetou", "winnetou", "winnetou", 
"winnetou", "winnetou", "winnetou", "winnetou", "heidilehr", 
"heidilehr", "heidilehr", "heidilehr", "heidilehr", "heidilehr", 
"heidilehr", "heidilehr", "heidilehr", "heidilehr", "heidilehr", 
"heidilehr", "heidilehr", "heidilehr", "heidilehr", "heidilehr", 
"heidilehr", "heidilehr", "heidilehr")), row.names = c(NA, -57L
), groups = structure(list(word = c("alm", "amareller", "ärgerliche", 
"auf", "bauers", "bauersleute", "baumreiche", "bedeutet", "besprechen", 
"bezeichnung", "bis", "das", "den", "denjenigen", "der", "des", 
"despektierliche", "dorfe", "du", "durch", "ein", "eine", "einleitung", 
"erregung", "fluren", "freundlichen", "führt", "für", "fuße", 
"fußweg", "gehöft", "greenhorn", "großer", "grüne", "hemmernmoos", 
"hinauf", "höchst", "im", "in", "knechte", "leser", "lieber", 
"mägde", "maienfeld", "öhi", "servaz", "umstehen", "und", "vom", 
"was", "weißt", "wort", "zum"), .rows = structure(list(40L, 
13L, 32L, 38L, 14L, 5L, 52L, 29L, 18L, 35L, 54L, c(9L, 26L
), 19L, 37L, 57L, 11L, 34L, 45L, 24L, 50L, 48L, 30L, 20L, 
3L, 53L, 44L, 47L, 36L, 56L, 49L, 10L, 28L, 2L, 51L, 16L, 
42L, 31L, 15L, 1L, 6L, 22L, 21L, 8L, 46L, 41L, 12L, 4L, c(7L, 
17L, 33L), 43L, 25L, 23L, 27L, c(39L, 55L)), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, 53L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

因为这些书没有章节,为了进行分析,我想插入一个"假"章节列,称为"章节",根据每本书的实际行数,将每本书(group_by(按比例拆分为10个部分(每本书10个章节(,并按从1到10的顺序命名章节。

对于dplyr,我找不到解决方案,也不知道如何实现。有什么建议吗?谢谢

一种方法是利用cut将每个book划分为10个部分。

library(dplyr)
df %>%
group_by(book) %>%
mutate(section = cut(row_number(), breaks = 10, labels = FALSE)) 
#   word        word_id book         section
#   <chr>         <int> <chr>          <int>
# 1 in                1 bergrichters       1
# 2 großer            2 bergrichters       1
# 3 erregung          3 bergrichters       2
# 4 umstehen          4 bergrichters       2
# 5 bauersleute       5 bergrichters       3
# 6 knechte           6 bergrichters       3
# 7 und               7 bergrichters       4
# 8 mägde             8 bergrichters       4
# 9 das               9 bergrichters       5
#10 gehöft           10 bergrichters       5
# … with 47 more rows

这也可以在使用ave:的基础R中完成

df$section <- with(df, ave(word_id, book, FUN = function(x) 
cut(seq_along(x), breaks = 10, labels = FALSE)))

最新更新