##### Code to generate the sample DF
cbind.fill <- function(...){
nm <- list(...)
nm <- lapply(nm, as.matrix)
n <- max(sapply(nm, nrow))
do.call(cbind, lapply(nm, function (x)
rbind(x, matrix(, n-nrow(x), ncol(x)))))
}
a <- data.frame(c("Pen","Pen","Pen","Ryu","Ryu","Ken"))
b <- data.frame(c("banana", "apple", 23, "Carrot", "grape"))
c <- data.frame(c("ryu",45,"ynwa"))
final <- data.frame(cbind.fill(a,b,c))
colnames(final) <- c("A","B","C")
A B C #This is my sample data set
1 Pen banana ryu
2 Pen apple 45
3 Pen 23 ynwa
4 Ryu Carrot <NA>
5 Ryu grape <NA>
6 Ken <NA> <NA>
#### 预期输出输出要求:我需要将上述输出拆分为 3 个数据帧,如下所示:
A B C #This is my 1st data frame
1 Pen banana ryu
2 Pen apple 45
3 Pen 23 ynwa
A B C #This is my 2nd data frame
4 Ryu Carrot <NA>
5 Ryu grape <NA>
A B C #This is my 3rd data frame
6 Ken <NA> <NA>
########I 到目前为止都试过这个 > final[final=="Pen",]
#when I subset "Pen", Now i have to remove the NA
A B C
1 Pen banana ryu
2 Pen apple 45
3 Pen 23 ynwa
NA <NA> <NA> <NA>
NA.1 <NA> <NA> <NA>
NA.2 <NA> <NA> <NA>
NA.3 <NA> <NA> <NA>
> final_pen <- final[complete.cases(final=="Pen"),]
#I use complete.cases to remove NA, and this looks exactly how i want, I move onto RYU
A B C
1 Pen banana ryu
2 Pen apple 45
3 Pen 23 ynwa
> final_ryu <- final[final=="Ryu",]
#I subset Ryu
A B C
4 Ryu Carrot <NA>
5 Ryu grape <NA>
NA <NA> <NA> <NA>
NA.1 <NA> <NA> <NA>
NA.2 <NA> <NA> <NA>
NA.3 <NA> <NA> <NA>
现在,当我在这里做一个完整的案例时,整个数据框都消失了,因为这里的每一行和每一列都有一个 NA。我期望的输出如下:
A B
4 Ryu Carrot
5 Ryu grape
我不想硬编码和子集,因为我会对大量数据执行此操作,并使用循环将大数据框拆分为多个数据框。请帮忙。这是我的帖子,我仍在学习在这里掌握它的窍门。因此,如果您认为这是一个愚蠢的问题,请不要投反对票。
从示例中,您似乎希望将数据帧拆分为多个,然后从子帧中删除所有空值的列。尝试这样的事情。
您必须使用列表来维护创建的新数据框。
# Sample Data Frame
> df = data.frame( Column1 = paste0('a',c(rep(1,5),rep(2,5),rep(3,5))), Column2 = c(rep(1:2, 5), rep(NA,5)), Column3 = c(rep(NA,5),rep(1:2,5)) )
> df
Column1 Column2 Column3
1 a1 1 NA
2 a1 2 NA
3 a1 1 NA
4 a1 2 NA
5 a1 1 NA
6 a2 2 1
7 a2 1 2
8 a2 2 1
9 a2 1 2
10 a2 2 1
11 a3 NA 2
12 a3 NA 1
13 a3 NA 2
14 a3 NA 1
15 a3 NA 2
#First. Lets Split on 1st Column.
> dflist = list()
> uniquevals = unique(df$Column1)
> for (i in 1:length(uniquevals)) {
+ dflist[[i]] = df[df$Column1 == uniquevals[i],]
+ }
> dflist
[[1]]
Column1 Column2 Column3
1 a1 1 NA
2 a1 2 NA
3 a1 1 NA
4 a1 2 NA
5 a1 1 NA
[[2]]
Column1 Column2 Column3
6 a2 2 1
7 a2 1 2
8 a2 2 1
9 a2 1 2
10 a2 2 1
[[3]]
Column1 Column2 Column3
11 a3 NA 2
12 a3 NA 1
13 a3 NA 2
14 a3 NA 1
15 a3 NA 2
#Next - Let's remove all columns where all values are NA
> newlist = lapply(X = dflist, FUN = function(df) { return( df[,apply(X = df, MARGIN = 2, FUN = function(x) { !all(is.na(x)) } )] ) } )
> newlist
[[1]]
Column1 Column2
1 a1 1
2 a1 2
3 a1 1
4 a1 2
5 a1 1
[[2]]
Column1 Column2 Column3
6 a2 2 1
7 a2 1 2
8 a2 2 1
9 a2 1 2
10 a2 2 1
[[3]]
Column1 Column3
11 a3 2
12 a3 1
13 a3 2
14 a3 1
15 a3 2
做!!