根据 r 中的平均值、最小值和最大值的向量创建数据帧



我正在尝试创建一个新的数据帧,其中包含现有数据帧 (top_SIM_weekly( 与另一个数据帧 (df_obs_weekly( 的平均值、最小值和最大值。

我包括了可重现的数据集:dput(top_SIM_weekly)

structure(list(X1 = c(18.3039606, 18.3039606, 18.3039606, 18.3039606, 
18.3039606, 18.3039606), X2 = c(30.73478281, 30.73478281, 30.73478281, 
30.73478281, 30.73478281, 30.73478281), X3 = c(50.88565679, 50.88565679, 
50.88565679, 50.88565679, 50.88565679, 50.88565679), X4 = c(35.40195165, 
35.40195165, 35.40195165, 35.40195165, 35.40195165, 35.40195165 ), X5 = c(23.12681287, 23.12681287, 23.12681287, 23.12681287, 
23.12681287, 23.12681287), X6 = c(41.35229054, 41.35229054, 41.35229054, 
41.35229054, 41.35229054, 41.35229054), X7 = c(67.48505048, 67.48505048, 
67.48505048, 67.48505048, 67.48505048, 67.48505048), X8 = c(53.04008984, 
53.04008984, 53.04008984, 53.04008984, 53.04008984, 53.04008984 ), X9 = c(58.26971959, 58.26971959, 58.26971959, 58.26971959, 
58.26971959, 58.26971959), X10 = c(111.1841342, 111.1841342, 
111.1841342, 111.1841342, 111.1841342, 111.1841342), X11 = c(100.7146155, 
100.7146155, 100.7146155, 100.7146155, 100.7146155, 100.7146155 ), X12 = c(224.9074216, 224.9074216, 224.9074216, 224.9074216, 
224.9074216, 224.9074216), X13 = c(255.2490203, 255.2490203, 
255.2490203, 255.2490203, 255.2490203, 255.2490203), X14 = c(404.1588161, 
404.1588161, 404.1588161, 404.1588161, 404.1588161, 404.1588161 ), X15 = c(443.9414483, 443.9414483, 443.9414483, 443.9414483, 
443.9414483, 443.9414483), X16 = c(886.3440158, 886.3440158, 
886.3440158, 886.3440158, 886.3440158, 886.3440158), X17 = c(1120.305186, 
1120.305186, 1120.305186, 1120.305186, 1120.305186, 1120.305186 ), X18 = c(1295.250279, 1295.250279, 1295.250279, 1295.250279, 
1295.250279, 1295.250279), X19 = c(1227.257398, 1227.257398, 
1227.257398, 1227.257398, 1227.257398, 1227.257398), X20 = c(462.4336653, 
462.4336653, 462.4336653, 462.4336653, 462.4336653, 462.4336653 ), X21 = c(533.812084, 533.812084, 533.812084, 533.812084, 533.812084, 
533.812084), X22 = c(-0.895926113, -0.895926113, -0.895926113, 
-0.895926113, -0.895926113, -0.895926113), X23 = c(0, 0, 0, 0,  0, 0), X24 = c(0.00587596, 0.00587596, 0.00587596, 0.00587596, 
0.00587596, 0.00587596), X25 = c(0, 0, 0, 0, 0, 0), X26 = c(0,  0, 0, 0, 0, 0), X27 = c(0, 0, 0, 0, 0, 0), X28 = c(0, 0, 0, 0,  0, 0), X29 = c(0, 0, 0, 0, 0, 0), X30 = c(0, 0, 0, 0, 0, 0), 
X31 = c(0, 0, 0, 0, 0, 0), X32 = c(0, 0, 0, 0, 0, 0), X33 = c(0, 
0, 0, 0, 0, 0), X34 = c(0, 0, 0, 0, 0, 0), X35 = c(0, 0, 
0, 0, 0, 0), X36 = c(0, 0, 0, 0, 0, 0), X37 = c(0, 0, 0, 
0, 0, 0), X38 = c(0, 0, 0, 0, 0, 0), X39 = c(0, 0, 0, 0, 
0, 0), X40 = c(0, 0, 0, 0, 0, 0), X41 = c(0.076302837, 0.076302837, 
0.076302837, 0.076302837, 0.076302837, 0.076302837), X42 = c(0.04497379, 
0.04497379, 0.04497379, 0.04497379, 0.04497379, 0.04497379
), X43 = c(0.47941265, 0.47941265, 0.47941265, 0.47941265, 
0.47941265, 0.47941265), X44 = c(1.70405395, 1.70405395, 
1.70405395, 1.70405395, 1.70405395, 1.70405395), X45 = c(1.069342585, 
1.069342585, 1.069342585, 1.069342585, 1.069342585, 1.069342585
), X46 = c(0.642559657, 0.642559657, 0.642559657, 0.642559657, 
0.642559657, 0.642559657), X47 = c(1.423669878, 1.423669878, 
1.423669878, 1.423669878, 1.423669878, 1.423669878), X48 = c(2.947726076, 
2.947726076, 2.947726076, 2.947726076, 2.947726076, 2.947726076
), X49 = c(2.083167482, 2.083167482, 2.083167482, 2.083167482, 
2.083167482, 2.083167482), X50 = c(3.001766438, 3.001766438, 
3.001766438, 3.001766438, 3.001766438, 3.001766438), X51 = c(4.02263428, 
4.02263428, 4.02263428, 4.02263428, 4.02263428, 4.02263428
), X52 = c(5.236041751, 5.236041751, 5.236041751, 5.236041751, 
5.236041751, 5.236041751), year = c(2018, 2018, 2018, 2018, 
2018, 2018), corr = c(0.598566601816163, 0.598566601816163, 
0.598566601816163, 0.598566601816163, 0.598566601816163, 
0.598566601816163)), row.names = 272:277, class = "data.frame")

dput(df_obs_weekly)

structure(list(Epiweek = structure(1L, .Label = "n", class = "factor"), 
X1 = 119L, X2 = 103L, X3 = 96L, X4 = 99L, X5 = 53L, X6 = 91L, 
X7 = 94L, X8 = 101L, X9 = 106L, X10 = 132L, X11 = 134L, X12 = 164L, 
X13 = 189L, X14 = 275L, X15 = 302L, X16 = 416L, X17 = 493L, 
X18 = 201L, X19 = NaN, X20 = NaN, X21 = 410L, X22 = 370L, 
X23 = 426L, X24 = 311L, X25 = 253L, X26 = 264L, X27 = 175L, 
X28 = 158L, X29 = 128L, X30 = 124L, X31 = 96L, X32 = 83L, 
X33 = 69L, X34 = 47L, X35 = 72L, X36 = 64L, X37 = 62L, X38 = 61L, 
X39 = 53L, X40 = 70L, X41 = 75L, X42 = 62L, X43 = 75L, X44 = 44L, 
X45 = 66L, X46 = 86L, X47 = 91L, X48 = 88L, X49 = 86L, X50 = 135L, 
X51 = 95L, X52 = 99L, year = 2018, corr = 1), row.names = 1L, class = "data.frame")

到目前为止,我尝试使用以下代码从平均值、最小值和最大值的向量创建一个矩阵:

mean <- top_SIM_weekly %>% summarise_if(is.numeric, mean)
max <- top_SIM_weekly %>% summarise_if(is.numeric, max)
min <- top_SIM_weekly %>% summarise_if(is.numeric, min)
y <- rbind(mean,max,min)
row.names(y) <- c("mean","max","min")

但我收到以下错误:Error: expecting a one sided formula, a function, or a function name.此错误以前没有出现过,所以我不确定发生了什么。我还想切换最终数据帧的行和列,并将第一行作为列名。

也许,我们需要

rbind(mean = x1, max = x2, min = x3)

meanmaxmin对象不是为rbind


由于"年份"和"corr"变量也是numeric,因此最好使用starts_with

library(dplyr)
library(tidyr)
top_SIM_weekly %>%
summarise(across(starts_with('X'),  ~
list(c(mean = mean(.), max = max(.), min = min(.))))) %>% 
unnest(everything())

另一种方法是selectnumeric变量,使用pivot_longer转向"long"格式,然后按meanminmax进行分组

library(dplyr) #1.0.0
library(tidyr)
library(tibble)
top_SIM_weekly %>%
select(where(is.numeric))  %>%
pivot_longer(cols = everything()) %>% 
group_by(name) %>% 
summarise(Mean = mean(value), Max = max(value), Min = min(value))%>% 
column_to_rownames('name') %>% 
t
c(mean, sum, min) %>% 
map_df(function(.x){
top_SIM_weekly %>% 
group_by(year,corr) %>% 
summarise_at(vars(starts_with('X')), .x) %>% 
return()
}) %>% 
bind_rows() 

您可以将map_df函数与function_names一起使用。 此代码不符合您的要求。但我相信这可以有所帮助

最新更新