r-如何拆分命名新dfs的df(group_split或split.data.frame)



首先,我看到了几个类似的问题,但没有指定dfs 的名称

我的df

structure(list(paciente = structure(c(6163, 6553, 6357, 6331, 
6228, 6280, 6383, 6198, 6316, 6148), label = "Paciente", format.spss = "F6.0"), 
grupo_int_v00 = structure(c(1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 
1L, 2L), .Label = c("A", "B"), label = "Grupo de intervención", class = "factor"), 
time = structure(c(3L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("00", "01", "66"), class = "factor"), peso1 = c(76.3, 
95.4, 103.5, 82.1, 103.3, 77.6, 76, 88.3, 64, 101), cintura1 = c(104.5, 
120, 116, 104, 120.5, 104, NA, 110, 104, 119), tasis2_e = c(145, 
137, 123, 153, 131, 147, NA, 147, 121, 131), tadias2_e = c(64, 
61, 76, 75, 65, 84, NA, 76, 59, 96), p17_total = c(12, 3, 
9, 6, 8, 10, 9, 7, 12, 9), geaf_tot = c(1678.32, 1398.6, 
1566.43, 587.41, 4876.46, 3776.22, 1762.24, 3188.81, 7192.54, 
1678.32), glucosa = c(273, 149, 96, 115, 101, 94, NA, 125, 
104, 107), albumi = c(4.15, 4.75, 4.59, 4.83, 4.64, 4.49, 
NA, 4.71, 4.33, 4.09), coltot = c(137, 174, 252, 270, 211, 
164, NA, 192, 281, 234), hdl = c(30, 56, 45, 74, 66, 51, 
NA, 34, 62, 44), ldl_calc = c(51, 95, NA, 177, 127, 90, NA, 
130, 186, 170), trigli = c(280, 114, 309, 96, 89, 115, NA, 
139, 165, 99), hba1c = c(13.77, 6.57, 5.65, 6.52, 5.69, 6.02, 
NA, 6.25, 5.95, 5.93), i_hucpeptide = c(3567.05, 1407.53, 
1259.29, 1028.31, 649.19, 893.52, NA, 815.82, 342.68, NA), 
i_hughrelin = c(1214.83, 874.6, 1015.68, 919.51, 456.28, 
650.22, NA, 143.32, 1159.1, NA), i_hugip = c(2.67, 2.67, 
2.67, 2.67, 2.67, 2.67, NA, 2.67, 2.67, NA), i_huglp1 = c(538.62, 
264.67, 106.76, 164.82, 141.23, 14.14, NA, 112.57, 14.14, 
NA), i_huglucagon = c(720.19, 801.94, 321.68, 629.04, 186.88, 
238.33, NA, 238, 265.84, NA), i_huinsulin = c(1646.21, 545.57, 
297.96, 333.05, 232.17, 263.55, NA, 263.87, 136.97, NA), 
i_huleptin = c(8476.58, 10680.93, 6034.91, 14225.58, 2160.27, 
2778.49, NA, 2829.59, 6102.63, NA), i_hupai1 = c(3787.2, 
2401.66, 1040.35, 2123.09, 1625.27, 1932.06, NA, 2483.08, 
919.81, NA), i_huresistin = c(11350.35, 5171.75, 5794.31, 
2814.22, 2994.15, 3215.24, NA, 2577.84, 3227.73, NA), i_huvisfatin = c(1652.92, 
2125.95, 407.98, 3544.59, 8.64, 132.49, NA, 8.64, 189.96, 
NA), col_rema = c(56, 23, NA, 19, 18, 23, NA, 28, 33, 20), 
homa = c(19974.0146666667, 3612.88577777778, 1271.296, 1702.25555555556, 
1042.18533333333, 1101.05333333333, NA, 1465.94444444444, 
633.105777777778, NA), i_pcr = c(0.39, 0.57, 0.04, 0.22, 
0.04, 1.01, NA, 0.1, 0.04, NA), i_ratiolg = c(6.97758534115885, 
12.2123599359707, 5.94174346250788, 15.4708268534328, 4.73452704479705, 
4.273153701824, NA, 19.743162154619, 5.26497282374256, NA
)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"
))

df看起来像:

paciente grupo_int_v00 time  peso1 cintura1 tasis2_e tadias2_e
<dbl> <fct>         <fct> <dbl>    <dbl>    <dbl>     <dbl>
1     6163 A             66     76.3     104.      145        64
2     6553 A             01     95.4     120       137        61
3     6357 B             01    104.      116       123        76
4     6331 B             00     82.1     104       153        75
5     6228 A             01    103.      120.      131        65
6     6280 B             01     77.6     104       147        84
split(df, df$grupo_int_v00) %>% list2env(envir = globalenv())

我正在生成名为grupo_int_v00的dfs,现在想象一下我想设置我的df名称,我该怎么做?因为我的grupo_int_v00是A或B,但通常是数字,我不希望它们是数字,所以我希望它们是group_A、group_B、group_C。。。不确定您是否可以提供dplyr选项(group_split和set_names或类似选项(

谢谢

您可以使用paste+set_names:更改名称

library(magrittr)
split(df, df$grupo_int_v00) %>% 
set_names(paste("group", names(.), sep = "_"))

基本R替代方案是:

spl <- split(df, df$grupo_int_v00)
names(spl) <- paste("group", names(spl), sep = "_")

一个更短的替代方案是在split函数中添加paste调用:

split(df, ~ paste0("group_", grupo_int_v00))

输出

> spl
$group_A
# A tibble: 6 × 30
paciente grupo_i…¹ time  peso1 cintu…² tasis…³ tadia…⁴ p17_t…⁵ geaf_…⁶ glucosa albumi coltot
<dbl> <fct>     <fct> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>  <dbl>  <dbl>
1     6163 A         66     76.3    104.     145      64      12   1678.     273   4.15    137
2     6553 A         01     95.4    120      137      61       3   1399.     149   4.75    174
3     6228 A         01    103.     120.     131      65       8   4876.     101   4.64    211
4     6383 A         01     76       NA       NA      NA       9   1762.      NA  NA        NA
5     6198 A         01     88.3    110      147      76       7   3189.     125   4.71    192
6     6316 A         01     64      104      121      59      12   7193.     104   4.33    281
# … with 18 more variables: hdl <dbl>, ldl_calc <dbl>, trigli <dbl>, hba1c <dbl>,
#   i_hucpeptide <dbl>, i_hughrelin <dbl>, i_hugip <dbl>, i_huglp1 <dbl>, i_huglucagon <dbl>,
#   i_huinsulin <dbl>, i_huleptin <dbl>, i_hupai1 <dbl>, i_huresistin <dbl>,
#   i_huvisfatin <dbl>, col_rema <dbl>, homa <dbl>, i_pcr <dbl>, i_ratiolg <dbl>, and
#   abbreviated variable names ¹​grupo_int_v00, ²​cintura1, ³​tasis2_e, ⁴​tadias2_e, ⁵​p17_total,
#   ⁶​geaf_tot
# ℹ Use `colnames()` to see all variable names
$group_B
# A tibble: 4 × 30
paciente grupo_i…¹ time  peso1 cintu…² tasis…³ tadia…⁴ p17_t…⁵ geaf_…⁶ glucosa albumi coltot
<dbl> <fct>     <fct> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>  <dbl>  <dbl>
1     6357 B         01    104.      116     123      76       9   1566.      96   4.59    252
2     6331 B         00     82.1     104     153      75       6    587.     115   4.83    270
3     6280 B         01     77.6     104     147      84      10   3776.      94   4.49    164
4     6148 B         01    101       119     131      96       9   1678.     107   4.09    234
# … with 18 more variables: hdl <dbl>, ldl_calc <dbl>, trigli <dbl>, hba1c <dbl>,
#   i_hucpeptide <dbl>, i_hughrelin <dbl>, i_hugip <dbl>, i_huglp1 <dbl>, i_huglucagon <dbl>,
#   i_huinsulin <dbl>, i_huleptin <dbl>, i_hupai1 <dbl>, i_huresistin <dbl>,
#   i_huvisfatin <dbl>, col_rema <dbl>, homa <dbl>, i_pcr <dbl>, i_ratiolg <dbl>, and
#   abbreviated variable names ¹​grupo_int_v00, ²​cintura1, ³​tasis2_e, ⁴​tadias2_e, ⁵​p17_total,
#   ⁶​geaf_tot
# ℹ Use `colnames()` to see all variable names

最新更新