假设我有以下数据帧:
dc tmin tmax cint wcmin wcmax wsmin wsmax gsmin gsmax wd rmin rmax cir lr
1: 24 -1 4 5 -5 -2 20 25 35 40 90 11.8 26.6 14.8 3
2: 41 -3 5 8 -8 -3 15 20 35 40 90 10.0 23.5 13.5 3
3: 48 0 5 5 -4 0 30 35 45 50 45 7.3 19.0 11.7 6
4: 50 0 5 5 -4 0 30 35 45 50 45 7.3 19.0 11.7 6
5: 52 3 5 2 -3 1 20 25 35 40 45 6.7 17.4 10.7 6
6: 57 -2 5 7 -6 -1 25 30 35 40 315 4.4 13.8 9.4 7
lc wc li yd yr nF factdcx
1: 1 3 TRUE 1 2010 2 24
2: 1 3 TRUE 1 2010 8 41
3: 2 3 TRUE 1 2010 0 48
4: 2 3 TRUE 1 2010 0 50
5: 2 3 TRUE 1 2010 0 52
6: 3 3 FALSE 1 2010 0 57
我想将其转换为如下所示的新数据帧:
dc tmin tmax cint wcmin wcmax wsmin wsmax gsmin gsmax wd rmin rmax cir lr
1: 24 -1 4 5 -5 -2 20 25 35 40 90 11.8 26.6 14.8 3
2: 41 -3 5 8 -8 -3 15 20 35 40 90 10.0 23.5 13.5 3
3: 48 0 5 5 -4 0 30 35 45 50 45 7.3 19.0 11.7 6
4: 52 3 5 2 -3 1 20 25 35 40 45 6.7 17.4 10.7 6
5: 57 -2 5 7 -6 -1 25 30 35 40 315 4.4 13.8 9.4 7
lc wc li yd yr nF factdcx
1: 1 3 TRUE 1 2010 2 24
2: 1 3 TRUE 1 2010 8 41
3: 2 3 TRUE 1 2010 0 (sum of nF for 48 and 50, factdcx) 48
4: 2 3 TRUE 1 2010 0 52
5: 3 3 FALSE 1 2010 0 57
我该怎么做?(当然,数据帧 abc 要大得多,但我想要所有类别 48 和 50 的总和,并将其分组到一个新类别中,比如"48")。
非常感谢!
> dput(head(abc1))
structure(list(dc = c(24L, 41L, 48L, 50L, 52L, 57L), tmin = c(-1L,
-3L, 0L, 0L, 3L, -2L), tmax = c(4L, 5L, 5L, 5L, 5L, 5L), cint = c(5L,
8L, 5L, 5L, 2L, 7L), wcmin = c(-5L, -8L, -4L, -4L, -3L, -6L),
wcmax = c(-2L, -3L, 0L, 0L, 1L, -1L), wsmin = c(20L, 15L,
30L, 30L, 20L, 25L), wsmax = c(25L, 20L, 35L, 35L, 25L, 30L
), gsmin = c(35L, 35L, 45L, 45L, 35L, 35L), gsmax = c(40L,
40L, 50L, 50L, 40L, 40L), wd = c(90L, 90L, 45L, 45L, 45L,
315L), rmin = c(11.8, 10, 7.3, 7.3, 6.7, 4.4), rmax = c(26.6,
23.5, 19, 19, 17.4, 13.8), cir = c(14.8, 13.5, 11.7, 11.7,
10.7, 9.4), lr = c(3L, 3L, 6L, 6L, 6L, 7L), lc = c(1L, 1L,
2L, 2L, 2L, 3L), wc = c(3L, 3L, 3L, 3L, 3L, 3L), li = c(TRUE,
TRUE, TRUE, TRUE, TRUE, FALSE), yd = c(1L, 1L, 1L, 1L, 1L,
1L), yr = c(2010L, 2010L, 2010L, 2010L, 2010L, 2010L), nF = c(2L,
8L, 0L, 0L, 0L, 0L), factdcx = structure(1:6, .Label = c("24",
"41", "48", "50", "52", "57", "70"), class = "factor")), .Names = c("dc",
"tmin", "tmax", "cint", "wcmin", "wcmax", "wsmin", "wsmax", "gsmin",
"gsmax", "wd", "rmin", "rmax", "cir", "lr", "lc", "wc", "li",
"yd", "yr", "nF", "factdcx"), class = c("data.table", "data.frame"
), row.names = c(NA, -6L), .internal.selfref = <pointer: 0x054b24a0>)
仍然有问题,先生/女士:
> head(abc1 (updated))
dc tmin tmax cint wcmin wcmax wsmin wsmax gsmin gsmax wd rmin rmax cir lr
1: 24 -1 4 5 -5 -2 20 25 35 40 90 11.8 26.6 14.8 3
2: 41 -3 5 8 -8 -3 15 20 35 40 90 10.0 23.5 13.5 3
3: 48 0 5 5 -4 0 30 35 45 50 45 7.3 19.0 11.7 6
4: 52 3 5 2 -3 1 20 25 35 40 45 6.7 17.4 10.7 6
5: 57 -2 5 7 -6 -1 25 30 35 40 315 4.4 13.8 9.4 7
6: 70 -2 3 5 -4 -1 20 25 30 35 360 3.6 10.2 6.6 7
lc wc li yd yr nF factdcx
1: 1 3 TRUE 1 2010 2 24
2: 1 3 TRUE 1 2010 8 41
3: 2 3 TRUE 1 2010 57 48
4: 2 3 TRUE 1 2010 0 52
5: 3 3 FALSE 1 2010 0 57
6: 3 2 TRUE 1 2010 1 70
nF 的总和不正确,它应该是零。
尝试
library(data.table)
unique(setDT(df1)[, factdcx:= as.character(factdcx)][factdcx %chin%
c('48','50'), c('dc', 'factdcx', 'nF') := list('48', '48', sum(nF))])
# dc tmin tmax cint wcmin wcmax wsmin wsmax gsmin gsmax wd rmin rmax cir lr
#1: 24 -1 4 5 -5 -2 20 25 35 40 90 11.8 26.6 14.8 3
#2: 41 -3 5 8 -8 -3 15 20 35 40 90 10.0 23.5 13.5 3
#3: 48 0 5 5 -4 0 30 35 45 50 45 7.3 19.0 11.7 6
#4: 52 3 5 2 -3 1 20 25 35 40 45 6.7 17.4 10.7 6
#5: 57 -2 5 7 -6 -1 25 30 35 40 315 4.4 13.8 9.4 7
# lc wc li yd yr nF factdcx
#1: 1 3 TRUE 1 2010 2 24
#2: 1 3 TRUE 1 2010 8 41
#3: 2 3 TRUE 1 2010 0 48
#4: 2 3 TRUE 1 2010 0 52
#5: 3 3 FALSE 1 2010 0 57
对于abc1
,
res1 <- unique(setDT(abc1)[, factdcx:= as.character(factdcx)][factdcx %chin%
c('48','50'), c('dc', 'factdcx', 'nF') := list(48, '48', sum(nF))])
res1
# dc tmin tmax cint wcmin wcmax wsmin wsmax gsmin gsmax wd rmin rmax cir lr
#1: 24 -1 4 5 -5 -2 20 25 35 40 90 11.8 26.6 14.8 3
#2: 41 -3 5 8 -8 -3 15 20 35 40 90 10.0 23.5 13.5 3
#3: 48 0 5 5 -4 0 30 35 45 50 45 7.3 19.0 11.7 6
#4: 52 3 5 2 -3 1 20 25 35 40 45 6.7 17.4 10.7 6
#5: 57 -2 5 7 -6 -1 25 30 35 40 315 4.4 13.8 9.4 7
# lc wc li yd yr nF factdcx
#1: 1 3 TRUE 1 2010 2 24
#2: 1 3 TRUE 1 2010 8 41
#3: 2 3 TRUE 1 2010 0 48
#4: 2 3 TRUE 1 2010 0 52
#5: 3 3 FALSE 1 2010 0 57
数据
df1 <- structure(list(dc = structure(1:6, .Label = c("24", "41",
"48",
"50", "52", "57"), class = "factor"), tmin = c(-1L, -3L, 0L,
0L, 3L, -2L), tmax = c(4L, 5L, 5L, 5L, 5L, 5L), cint = c(5L,
8L, 5L, 5L, 2L, 7L), wcmin = c(-5L, -8L, -4L, -4L, -3L, -6L),
wcmax = c(-2L, -3L, 0L, 0L, 1L, -1L), wsmin = c(20L, 15L,
30L, 30L, 20L, 25L), wsmax = c(25L, 20L, 35L, 35L, 25L, 30L
), gsmin = c(35L, 35L, 45L, 45L, 35L, 35L), gsmax = c(40L,
40L, 50L, 50L, 40L, 40L), wd = c(90L, 90L, 45L, 45L, 45L,
315L), rmin = c(11.8, 10, 7.3, 7.3, 6.7, 4.4), rmax = c(26.6,
23.5, 19, 19, 17.4, 13.8), cir = c(14.8, 13.5, 11.7, 11.7,
10.7, 9.4), lr = c(3L, 3L, 6L, 6L, 6L, 7L), lc = c(1L, 1L,
2L, 2L, 2L, 3L), wc = c(3L, 3L, 3L, 3L, 3L, 3L), li = c(TRUE,
TRUE, TRUE, TRUE, TRUE, FALSE), yd = c(1L, 1L, 1L, 1L, 1L,
1L), yr = c(2010L, 2010L, 2010L, 2010L, 2010L, 2010L), nF = c(2L,
8L, 0L, 0L, 0L, 0L), factdcx = structure(1:6, .Label = c("24",
"41", "48", "50", "52", "57"), class = "factor")), .Names = c("dc",
"tmin", "tmax", "cint", "wcmin", "wcmax", "wsmin", "wsmax", "gsmin",
"gsmax", "wd", "rmin", "rmax", "cir", "lr", "lc", "wc", "li",
"yd", "yr", "nF", "factdcx"), row.names = c("1:", "2:", "3:",
"4:", "5:", "6:"), class = "data.frame")