r-当时间指示器不是从第1列开始时,如何将数据转换为long



我有以下数据,这让我头疼

编辑:更新了列名和数据,以更好地代表我的数据,因为我没有技能应用已经带来的第四个答案(对不起(

dat3 <- data.frame(
sex = c("F","F","F", "F","M","M","M", "M","TOT","TOT","TOT","TOT","F","F","F", "F","M","M","M", "M","TOT","TOT","TOT","TOT"),
age = c(rep("Y70-74",12),rep("Y75-79",12)),
geo = c("UK","GER","FRA", "POL","UK","GER","FRA", "POL","UK","GER","FRA","POL","UK","GER","FRA", "POL","UK","GER","FRA", "POL","UK","GER","FRA","POL"),
"2021W3" = c(5,3,7,5,8,2,6,5,13,5,13,10,  1,2,3,4,1,1,1,1,2,3,4,5),
"2021W2" = c(6,8,4,5,9,1,0,4,15,9,12,9,   1,2,3,4,1,1,1,1,2,3,4,5),
"2021W1" = c(8,7,9,2,1,2,3,6,9, 9,12,8 ,  1,2,3,4,1,1,1,1,2,3,4,5),
"2020W52"= c(1,2,8,2,5,1,2,4,6, 3,10,6,   1,2,3,4,1,1,1,1,2,3,4,5),
"2020W51"= c(4,4,3,6,4,5,1,0,9, 5,4, 6 ,  1,2,3,4,1,1,1,1,2,3,4,5),
"2020W50"= c(3,6,3,5,2,1,2,5,5, 7,5, 10,  1,2,3,4,1,1,1,1,2,3,4,5))

dat3
sex    age geo X2021W3 X2021W2 X2021W1 X2020W52 X2020W51 X2020W50
1    F Y70-74  UK       5       6       8        1        4        3
2    F Y70-74 GER       3       8       7        2        4        6
3    F Y70-74 FRA       7       4       9        8        3        3
4    F Y70-74 POL       5       5       2        2        6        5
5    M Y70-74  UK       8       9       1        5        4        2
6    M Y70-74 GER       2       1       2        1        5        1
7    M Y70-74 FRA       6       0       3        2        1        2
8    M Y70-74 POL       5       4       6        4        0        5
9  TOT Y70-74  UK      13      15       9        6        9        5
10 TOT Y70-74 GER       5       9       9        3        5        7
11 TOT Y70-74 FRA      13      12      12       10        4        5
12 TOT Y70-74 POL      10       9       8        6        6       10
13   F Y75-79  UK       1       1       1        1        1        1
14   F Y75-79 GER       2       2       2        2        2        2
15   F Y75-79 FRA       3       3       3        3        3        3
16   F Y75-79 POL       4       4       4        4        4        4
17   M Y75-79  UK       1       1       1        1        1        1
18   M Y75-79 GER       1       1       1        1        1        1
19   M Y75-79 FRA       1       1       1        1        1        1
20   M Y75-79 POL       1       1       1        1        1        1
21 TOT Y75-79  UK       2       2       2        2        2        2
22 TOT Y75-79 GER       3       3       3        3        3        3
23 TOT Y75-79 FRA       4       4       4        4        4        4
24 TOT Y75-79 POL       5       5       5        5        5        5



dat4 <- data.frame(
Time = rep(c("2020W50","2020W51","2020W52","20210W1","2021W2","2021W3"),4),
geo = c(rep("UK",6),rep("GER",6), rep("FRA",6),rep("POL",6)),
"FY70-74" = c(5,6,8,1,4,3,3,8,7,2,4,6,7,4,9,8,3,3,5,5,2,2,6,5),
"MY70-74" = c(8,9,1,5,4,2,2,1,2,1,5,1,6,0,3,2,1,2,5,4,6,4,0,5),
"TY70-74" = c(13,15,9,6,8,5,5,9,9,3,9,7,13,4,12,10,4,5,10,9,8,6,6,10),
"FY75-79" = c(1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4),
"MY75-74" = c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
"TY75-79" = c(2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,5,5,5,5,5,5))

>    dat4
Time geo FY70.74 MY70.74 TY70.74 FY75.79 MY75.74 TY75.79
1  2020W50  UK       5       8      13       1       1       2
2  2020W51  UK       6       9      15       1       1       2
3  2020W52  UK       8       1       9       1       1       2
4  20210W1  UK       1       5       6       1       1       2
5   2021W2  UK       4       4       8       1       1       2
6   2021W3  UK       3       2       5       1       1       2
7  2020W50 GER       3       2       5       2       1       3
8  2020W51 GER       8       1       9       2       1       3
9  2020W52 GER       7       2       9       2       1       3
10 20210W1 GER       2       1       3       2       1       3
11  2021W2 GER       4       5       9       2       1       3
12  2021W3 GER       6       1       7       2       1       3
13 2020W50 FRA       7       6      13       3       1       4
14 2020W51 FRA       4       0       4       3       1       4
15 2020W52 FRA       9       3      12       3       1       4
16 20210W1 FRA       8       2      10       3       1       4
17  2021W2 FRA       3       1       4       3       1       4
18  2021W3 FRA       3       2       5       3       1       4
19 2020W50 POL       5       5      10       4       1       5
20 2020W51 POL       5       4       9       4       1       5
21 2020W52 POL       2       6       8       4       1       5
22 20210W1 POL       2       4       6       4       1       5
23  2021W2 POL       6       0       6       4       1       5
24  2021W3 POL       5       5      10       4       1       5
> 

编辑:我非常抱歉,我无法想象我想要我的数据集,但我现在知道了——我想要三个性别类别;女性";,男性"总计";分布在所有年龄段:

"LT5 Y5-9 Y14-19 Y20-24 Y25-29 Y30-34 Y35-39 Y40-44 Y45-49 Y50-54 Y55-59 Y60-64 Y65-69 Y70-74 Y75-79 Y80-84 Y85-89 OT90〃;

因此,在每个年龄段内:女性+男性=总

注意:我的TIME索引必须从第一个日期-2020W50开始,所以我需要在melt之前以某种方式翻转列的顺序

在我的例子中,数字和日期现在混淆了。我想要正确的日期顺序(从第一到最后,当然要附上正确的数字(

我也不知道为什么会有";X’es";在我的列名后面。同样地,R改变了";70-74〃;至";70.74〃;。不知道为什么。

如果你喜欢data.table包,这里有一种方法:

library(data.table)
setDT(datWide)
datLong <- datWide[, melt(.SD , id.vars = c("sex", "age", "geo"), variable.name = "Time")
][, dcast(.SD, age + geo + Time ~ sex, value.var = "value")]
datLong

样本输出:

age geo  Time F M TOT
1: Y70-74 FRA Week1 7 6  13
2: Y70-74 FRA Week2 4 0  12
3: Y70-74 FRA Week3 9 3  12
4: Y70-74 FRA Week4 8 2  10
5: Y70-74 FRA Week5 3 1   4

使用这种方法,无论您有多少周的时间,都只能指定您的id.vars

library(tidyr)
library(stringr)
n <- 1129 # change this to the number accordingly   
names_list <- as.character()
names_list[1:n] <- str_c("Week",seq(1:n)) %>% unlist()
datLong <- datWide %>% 
pivot_longer(cols = names_list,
names_to = "Time") %>%
pivot_wider(names_from = "sex")
# A tibble: 24 x 6
age    geo   Time      F     M   TOT
<chr>  <chr> <chr> <dbl> <dbl> <dbl>
1 Y70-74 UK    Week1     5     8    13
2 Y70-74 UK    Week2     6     9    15
3 Y70-74 UK    Week3     8     1     9
4 Y70-74 UK    Week4     1     5     6
5 Y70-74 UK    Week5     4     4     9
6 Y70-74 UK    Week6     3     2     5
7 Y70-74 GER   Week1     3     2     5
8 Y70-74 GER   Week2     8     1     9
9 Y70-74 GER   Week3     7     2     9
10 Y70-74 GER   Week4     2     1     3
# ... with 14 more rows

更新:R将X放在日期列之前,因为R变量"应该"以字符而非数字开头。列名正在更改,因为-字符是非标准的对象名字符。要解决这两个问题,请将check.names=FALSE添加到数据帧构造函数的末尾:

dat3 <- data.frame(
sex = c("F","F","F", "F","M","M","M", "M","TOT","TOT","TOT","TOT","F","F","F", "F","M","M","M", "M","TOT","TOT","TOT","TOT"),
age = c(rep("Y70-74",12),rep("Y75-79",12)),
geo = c("UK","GER","FRA", "POL","UK","GER","FRA", "POL","UK","GER","FRA","POL","UK","GER","FRA", "POL","UK","GER","FRA", "POL","UK","GER","FRA","POL"),
"2021W3" = c(5,3,7,5,8,2,6,5,13,5,13,10,  1,2,3,4,1,1,1,1,2,3,4,5),
"2021W2" = c(6,8,4,5,9,1,0,4,15,9,12,9,   1,2,3,4,1,1,1,1,2,3,4,5),
"2021W1" = c(8,7,9,2,1,2,3,6,9, 9,12,8 ,  1,2,3,4,1,1,1,1,2,3,4,5),
"2020W52"= c(1,2,8,2,5,1,2,4,6, 3,10,6,   1,2,3,4,1,1,1,1,2,3,4,5),
"2020W51"= c(4,4,3,6,4,5,1,0,9, 5,4, 6 ,  1,2,3,4,1,1,1,1,2,3,4,5),
"2020W50"= c(3,6,3,5,2,1,2,5,5, 7,5, 10,  1,2,3,4,1,1,1,1,2,3,4,5),
check.names = FALSE)
dat3
sex    age geo  2021W3  2021W2  2021W1  2020W52  2020W51  2020W50
1    F Y70-74  UK       5       6       8        1        4        3
2    F Y70-74 GER       3       8       7        2        4        6
3    F Y70-74 FRA       7       4       9        8        3        3
4    F Y70-74 POL       5       5       2        2        6        5
5    M Y70-74  UK       8       9       1        5        4        2
6    M Y70-74 GER       2       1       2        1        5        1
7    M Y70-74 FRA       6       0       3        2        1        2
8    M Y70-74 POL       5       4       6        4        0        5
9  TOT Y70-74  UK      13      15       9        6        9        5
10 TOT Y70-74 GER       5       9       9        3        5        7
11 TOT Y70-74 FRA      13      12      12       10        4        5
12 TOT Y70-74 POL      10       9       8        6        6       10
13   F Y75-79  UK       1       1       1        1        1        1

这允许您以数字开始列名,但不建议使用,应尽可能避免使用。

要将格式从dat3更改为dat4,您可以使用:

library(dplyr)
names_list <- colnames(dat3[4:length(dat3)])
datNew <- dat3 %>% 
pivot_longer(cols = names_list, names_to = "Time") %>% 
pivot_wider(names_from = c("sex","age"), )
datNew <- arrange(datNew, desc("Time"))
datNew
# A tibble: 24 x 8
geo   Time   `F_Y70-74` `M_Y70-74` `TOT_Y70-74` `F_Y75-79` `M_Y75-79` `TOT_Y75-79`
<chr> <chr>       <dbl>      <dbl>        <dbl>      <dbl>      <dbl>        <dbl>
1 UK    2021W3          5          8           13          1          1            2
2 GER   2021W3          3          2            5          2          1            3
3 FRA   2021W3          7          6           13          3          1            4
4 POL   2021W3          5          5           10          4          1            5
5 UK    2021W2          6          9           15          1          1            2
6 GER   2021W2          8          1            9          2          1            3
7 FRA   2021W2          4          0           12          3          1            4
8 POL   2021W2          5          4            9          4          1            5
9 UK    2021W1          8          1            9          1          1            2
10 GER   2021W1          7          2            9          2          1            3
# ... with 14 more rows

其将创建相关列并按日期降序排列行。

由于R将"-"字符解释为减号,因此必须使用引号来调用这些列。

> datNew$`F_Y70-74`
[1] 5 6 8 1 4 3 3 8 7 2 4 6 7 4 9 8 3 3 5 5 2 2 6 5
> datNew$F_Y70-74
numeric(0)
Warning message:
Unknown or uninitialised column: `F_Y70`.

我建议将这些更改为下划线。

使用dcast+meltdata.table选项

dcast(
melt(setDT(datWide),
id.var = 1:3,
variable.name = "Time"
),
Time + geo + age ~ sex
)[order(-geo, Time)]

给出

Time geo    age F M TOT
1: Week1  UK Y70-74 5 8  13
2: Week2  UK Y70-74 6 9  15
3: Week3  UK Y70-74 8 1   9
4: Week4  UK Y70-74 1 5   6
5: Week5  UK Y70-74 4 4   9
6: Week6  UK Y70-74 3 2   5
7: Week1 POL Y70-74 5 5  10
8: Week2 POL Y70-74 5 4   9
9: Week3 POL Y70-74 2 6   8
10: Week4 POL Y70-74 2 4   6
11: Week5 POL Y70-74 6 0   6
12: Week6 POL Y70-74 5 5  10
13: Week1 GER Y70-74 3 2   5
14: Week2 GER Y70-74 8 1   9
15: Week3 GER Y70-74 7 2   9
16: Week4 GER Y70-74 2 1   3
17: Week5 GER Y70-74 4 5   5
18: Week6 GER Y70-74 6 1   7
19: Week1 FRA Y70-74 7 6  13
20: Week2 FRA Y70-74 4 0  12
21: Week3 FRA Y70-74 9 3  12
22: Week4 FRA Y70-74 8 2  10
23: Week5 FRA Y70-74 3 1   4
24: Week6 FRA Y70-74 3 2   5
Time geo    age F M TOT

相关内容

最新更新