r语言 - 如何根据列表中数据帧中的列数据删除NAs ?



我有一个列表(my.list),看起来像这样:

$S1
Study_ID   B   C         D
1      100  NA  C1 0.9124000
2      100 1.5 PTA        NA
3      200 1.8  C1 0.5571429
4      200 2.1 PTA 0.7849462
5      300 3.2  C1 0.3271900
6      300 1.4 PTA        NA
7      400  NA  C1 0.8248200
8      400 9.3 PTA 0.2847020
$S2
Study_ID    B   C         D
1      100   NA  C1 0.9124000
2      100 0.70 PTA        NA
3      200   NA  C1 0.5571429
4      200 0.45 PTA 0.7849462
5      300 0.91  C1 0.3271900
6      300 0.78 PTA 0.6492000
7      400 0.65  C1 0.8248200
8      400   NA PTA        NA

如果患者在D列中有'NA',我想删除整个从列表中删除患者-即根据Study_ID删除它们。

换句话说,如果在D列中有一个NA,我想删除具有相同Study_ID的两行。

我想要的输出是这样的:

$S1
Study_ID   B   C         D
1      200 1.8  C1 0.5571429
2      200 2.1 PTA 0.7849462
3      400  NA  C1 0.8248200
4      400 9.3 PTA 0.2847020
$S2
Study_ID    B   C         D
1      200   NA  C1 0.5571429
2      200 0.45 PTA 0.7849462
3      300 0.91  C1 0.3271900
4      300 0.78 PTA 0.6492000

我该怎么做呢?

可再生的数据:

my.list <- structure(list(S1 = structure(list(Study_ID = c(100, 100, 200, 
200, 300,300,400,400), B = c(NA, 1.5, 1.8, 2.1, 3.2, 1.4, NA, 9.3), C = c("C1", "PTA", "C1", "PTA", "C1", "PTA","C1", "PTA"), D = c(0.9124, NA, 0.5571429, 0.7849462, 0.32719, NA, 0.82482, 0.284702
)), .Names = c("Study_ID", "B", "C", "D"), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8")), S2 = structure(list(Study_ID = c(100, 100, 200, 
200, 300,300,400,400), B = c(NA, 0.7, NA, 0.45, 
0.91, 0.78, 0.65, NA), C = c("C1", "PTA", "C1", "PTA", "C1", "PTA", "C1", "PTA"), D = c(0.9124, NA, 0.5571429, 0.7849462, 0.32719,0.6492, 0.82482, NA
)), .Names = c("Study_ID", "B", "C", 
"D"), class = "data.frame", row.names = c("1", "2", "3", "4", 
"5", "6", "7", "8"))), .Names = c("S1", "S2"))

tidyverse

library(tidyverse)
my.list %>% 
map(~group_by(.x, Study_ID)) %>% 
map(~filter(.x, !any(is.na(D))))
#> $S1
#> # A tibble: 4 × 4
#> # Groups:   Study_ID [2]
#>   Study_ID     B C         D
#>      <dbl> <dbl> <chr> <dbl>
#> 1      200   1.8 C1    0.557
#> 2      200   2.1 PTA   0.785
#> 3      400  NA   C1    0.825
#> 4      400   9.3 PTA   0.285
#> 
#> $S2
#> # A tibble: 4 × 4
#> # Groups:   Study_ID [2]
#>   Study_ID     B C         D
#>      <dbl> <dbl> <chr> <dbl>
#> 1      200 NA    C1    0.557
#> 2      200  0.45 PTA   0.785
#> 3      300  0.91 C1    0.327
#> 4      300  0.78 PTA   0.649

data.table

library(magrittr)
library(data.table)
lapply(my.list, setDT) %>% 
lapply(function(x) x[, .SD[!any(is.na(D))], by = Study_ID])
#> $S1
#>    Study_ID   B   C         D
#> 1:      200 1.8  C1 0.5571429
#> 2:      200 2.1 PTA 0.7849462
#> 3:      400  NA  C1 0.8248200
#> 4:      400 9.3 PTA 0.2847020
#> 
#> $S2
#>    Study_ID    B   C         D
#> 1:      200   NA  C1 0.5571429
#> 2:      200 0.45 PTA 0.7849462
#> 3:      300 0.91  C1 0.3271900
#> 4:      300 0.78 PTA 0.6492000

my.list <-
structure(list(
S1 = structure(
list(
Study_ID = c(100, 100, 200,
200, 300, 300, 400, 400),
B = c(NA, 1.5, 1.8, 2.1, 3.2, 1.4, NA, 9.3),
C = c("C1", "PTA", "C1", "PTA", "C1", "PTA", "C1", "PTA"),
D = c(0.9124, NA, 0.5571429, 0.7849462, 0.32719, NA, 0.82482, 0.284702)
),
.Names = c("Study_ID", "B", "C", "D"),
class = "data.frame",
row.names = c("1",
"2", "3", "4", "5", "6", "7", "8")
),
S2 = structure(
list(
Study_ID = c(100, 100, 200,
200, 300, 300, 400, 400),
B = c(NA, 0.7, NA, 0.45,
0.91, 0.78, 0.65, NA),
C = c("C1", "PTA", "C1", "PTA", "C1", "PTA", "C1", "PTA"),
D = c(0.9124, NA, 0.5571429, 0.7849462, 0.32719, 0.6492, 0.82482, NA)
),
.Names = c("Study_ID", "B", "C",
"D"),
class = "data.frame",
row.names = c("1", "2", "3", "4",
"5", "6", "7", "8")
)
), .Names = c("S1", "S2"))

@Yuriy答案的小替代:

library(dplyr)
library(purrr)
map(my.list, function(x) {
x %>% 
group_by(Study_ID) %>% 
filter(all(!is.na(D))) %>% 
ungroup()
})

In base R:

lapply(my.list, function(x) {
to_remove <- unique(x[which(is.na(x$D)), "Study_ID"])
x[!x$Study_ID %in% to_remove, ]
})

最新更新