我在R中有下表:
S <- c("A","A","A","B","B","B","C","C","C")
TS <- c(1,1,1,2,2,2,3,3,3)
f1 <- c(10,20,30,15,25,35,17,27,37)
p <- c(100,200,300,150,250,350,170,270,370)
df <- data.frame(S, TS, f1, p)
看起来是这样的:
s | TS | f1 | p|
---|---|---|---|
A | 1 | <1>10 | 100|
A | 2 | 20 | 200 |
A | 3 | 30 | 300 |
B | 1 | 15 | 150 |
B | 2 | 25 | 250[/tr>|
B | 3 | 35 | 350 |
C | 1 | 17 | 170 |
C | 2 | 27 | 270 |
C | 3 | 37 | 370 |
使用以下方法可以对任意数量的字符串执行此操作:
library(tidyr)
res1 <- df %>% group_by(TS) %>% mutate(colnames = paste0("SA_f",row_number())) %>% select(-p) %>%
pivot_wider(names_from = colnames, values_from = f1)
res2 <- df %>% group_by(TS) %>% mutate(colnames = paste0("p_",row_number())) %>% select(-f1) %>%
pivot_wider(names_from = colnames, values_from = p)
result <- merge(res1, res2)
S TS SA_f1 SA_f2 SA_f3 p_1 p_2 p_3
1 A 1 10 20 30 100 200 300
2 B 2 15 25 35 150 250 350
3 C 3 17 27 37 170 270 370
如果您有100多个变量,我会将其包含在for
循环中,并迭代执行merge
。
library(tidyr)
reslist <- list()
varlist <- c("f1", "p")
for(i in varlist) {
thisres <- res1 <- df %>% group_by(TS) %>% mutate(colnames = paste(i,row_number(), sep="_")) %>% select(S, TS, i, colnames) %>%
pivot_wider(names_from = colnames, values_from = i)
reslist[[i]] <- thisres
}
res <- reslist[[1]]
for(i in 2:length(reslist)) {
res <- merge(res, reslist[[i]])
}
原始简化答案:
您可以使用:
library(dplyr)
df %>% group_by(TS) %>% summarise(SA_f1=f1[1], SB_f1=f1[2],SC_f1=f1[3], pA=p[1], pB=p[2], pC=p[3])
# A tibble: 3 x 7
TS SA_f1 SB_f1 SC_f1 pA pB pC
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 10 20 30 100 200 300
2 2 15 25 35 150 250 350
3 3 17 27 37 170 270 370
我认为,结果的差异来自于原始输入与显示内容的不同。
您的R代码中有一个错误,应该是
TS <- rep(1:3,3)
无论如何,一个非常优雅的解决方案
library(reshape2)
dcast(
melt(df,id.vars=c("S","TS")),
TS~S+variable,
value.var="value",
fun.aggregate=mean
)
TS A_f1 A_p B_f1 B_p C_f1 C_p
1 1 10 100 15 150 17 170
2 2 20 200 25 250 27 270
3 3 30 300 35 350 37 370