在 R 中子集和拆分数据框的问题(使用 cbind.fill 和 rbind.fill 创建)


##### Code to generate the sample DF
cbind.fill <- function(...){
                            nm <- list(...) 
                            nm <- lapply(nm, as.matrix)
                            n <- max(sapply(nm, nrow)) 
                            do.call(cbind, lapply(nm, function (x) 
                            rbind(x, matrix(, n-nrow(x), ncol(x))))) 
                        }
a <- data.frame(c("Pen","Pen","Pen","Ryu","Ryu","Ken"))
b <- data.frame(c("banana", "apple", 23, "Carrot", "grape"))
c <- data.frame(c("ryu",45,"ynwa"))
final <- data.frame(cbind.fill(a,b,c))
colnames(final) <- c("A","B","C")
    A      B    C           #This is my sample data set
1 Pen banana  ryu
2 Pen  apple   45
3 Pen     23 ynwa
4 Ryu Carrot <NA>
5 Ryu  grape <NA>
6 Ken   <NA> <NA>
#### 预期输出输出

要求:我需要将上述输出拆分为 3 个数据帧,如下所示:

    A      B    C           #This is my 1st data frame
1 Pen banana  ryu
2 Pen  apple   45
3 Pen     23 ynwa
    A      B    C           #This is my 2nd data frame
4 Ryu Carrot <NA>
5 Ryu  grape <NA>
    A      B    C           #This is my 3rd data frame
6 Ken   <NA> <NA>
########I 到目前为止都试过这个
> final[final=="Pen",]
        #when I subset "Pen", Now i have to remove the NA
        A      B    C
1     Pen banana  ryu
2     Pen  apple   45
3     Pen     23 ynwa
NA   <NA>   <NA> <NA>
NA.1 <NA>   <NA> <NA>
NA.2 <NA>   <NA> <NA>
NA.3 <NA>   <NA> <NA>
> final_pen <- final[complete.cases(final=="Pen"),]
    #I use complete.cases to remove NA, and this looks exactly how i want, I move onto RYU
    A      B    C
1 Pen banana  ryu
2 Pen  apple   45
3 Pen     23 ynwa

> final_ryu <- final[final=="Ryu",] 
    #I subset Ryu
        A      B    C
4     Ryu Carrot <NA>
5     Ryu  grape <NA>
NA   <NA>   <NA> <NA>
NA.1 <NA>   <NA> <NA>
NA.2 <NA>   <NA> <NA>
NA.3 <NA>   <NA> <NA>

现在,当我在这里做一个完整的案例时,整个数据框都消失了,因为这里的每一行和每一列都有一个 NA。我期望的输出如下:

        A      B
4     Ryu Carrot 
5     Ryu  grape 

我不想硬编码和子集,因为我会对大量数据执行此操作,并使用循环将大数据框拆分为多个数据框。请帮忙。这是我的帖子,我仍在学习在这里掌握它的窍门。因此,如果您认为这是一个愚蠢的问题,请不要投反对票。

从示例中,您似乎希望将数据帧拆分为多个,然后从子帧中删除所有空值的列。尝试这样的事情。

您必须使用列表来维护创建的新数据框。

# Sample Data Frame
> df = data.frame( Column1 = paste0('a',c(rep(1,5),rep(2,5),rep(3,5))), Column2 = c(rep(1:2, 5), rep(NA,5)), Column3 = c(rep(NA,5),rep(1:2,5))  )
> df
   Column1 Column2 Column3
1       a1       1      NA
2       a1       2      NA
3       a1       1      NA
4       a1       2      NA
5       a1       1      NA
6       a2       2       1
7       a2       1       2
8       a2       2       1
9       a2       1       2
10      a2       2       1
11      a3      NA       2
12      a3      NA       1
13      a3      NA       2
14      a3      NA       1
15      a3      NA       2
#First. Lets Split on 1st Column.
> dflist = list()
> uniquevals = unique(df$Column1) 
> for (i in 1:length(uniquevals)) {
+     dflist[[i]] = df[df$Column1 == uniquevals[i],]
+ }
> dflist
[[1]]
  Column1 Column2 Column3
1      a1       1      NA
2      a1       2      NA
3      a1       1      NA
4      a1       2      NA
5      a1       1      NA
[[2]]
   Column1 Column2 Column3
6       a2       2       1
7       a2       1       2
8       a2       2       1
9       a2       1       2
10      a2       2       1
[[3]]
   Column1 Column2 Column3
11      a3      NA       2
12      a3      NA       1
13      a3      NA       2
14      a3      NA       1
15      a3      NA       2
#Next - Let's remove all columns where all values are NA
> newlist = lapply(X = dflist, FUN = function(df) { return(  df[,apply(X = df, MARGIN = 2, FUN = function(x) { !all(is.na(x)) } )] ) }  )
> newlist
[[1]]
  Column1 Column2
1      a1       1
2      a1       2
3      a1       1
4      a1       2
5      a1       1
[[2]]
   Column1 Column2 Column3
6       a2       2       1
7       a2       1       2
8       a2       2       1
9       a2       1       2
10      a2       2       1
[[3]]
   Column1 Column3
11      a3       2
12      a3       1
13      a3       2
14      a3       1
15      a3       2

做!!

最新更新