r-如何用同一数据集中其他相应多列的值替换多列中的NA

我正在尝试用df中其他列的相应值替换多个列上的NA。


df = data.frame(ID = sample(1000:9999,10),
Age = sample(18:99,10),
Gender = sample(c("M","F"),10, replace = TRUE),              
Test1 = sample(60:100,10),
Test2 = sample(60:100,10),
Test3 = sample(60:100,10),
Test1.x = rep(NA,10),
Test2.x = rep(NA,10),
Test3.x = rep(NA,10))
df$Test1[c(2,3,8)] = NA
df$Test2[c(4,10)] = NA
df$Test3[c(1,7)] = NA
df$Test1.x[c(2,3,4,8)] = sample(60:100,4)
df$Test2.x[c(4,9,10)] = sample(60:100,3)
df$Test3.x[c(1,6,7)] = sample(60:100,3)
print(df)
ID Age Gender Test1 Test2 Test3 Test1.x Test2.x Test3.x
1  7877  40      M    78    70    NA      NA      NA      84
2  6345  54      F    NA    99    61      62      NA      NA
3  9170  41      F    NA    80    96      82      NA      NA
4  2400  83      M   100    NA   100      94      95      NA
5  5920  66      M    77    62    69      NA      NA      NA
6  2569  34      M    99    96    81      NA      NA     100
7  7879  28      M    64    71    NA      NA      NA      90
8  8652  53      F    NA    74    89      95      NA      NA
9  6357  97      F    92    86    83      NA      86      NA
10 1943  45      M    95    NA    98      NA      72      NA

在使用str_replace时，我只想用相应的test.x分数替换测试分数中的NA。我的实际数据帧包含3个以上的列，但所有对应的列名都与"相同；。x〃；之后

有什么想法可以让这件事变得简单快捷吗？我正在努力在所述列之间进行变异或使用replace_nas。

在dplyr中，我们可以将coalesce与across一起使用。

library(dplyr)
df |>
mutate(across(starts_with("Test") & !ends_with(".x"),
~ coalesce(., get(paste0(cur_column(), ".x")))))

输出：

ID Age Gender Test1 Test2 Test3 Test1.x Test2.x Test3.x
1  5022  90      M    94    68    79      NA      NA      79
2  1625  41      M    71    66    89      71      NA      NA
3  6438  86      M    86    94    94      86      NA      NA
4  3249  93      F    74    90    76      68      90      NA
5  7338  70      F    64    63    70      NA      NA      NA
6  9416  27      F    78    74    75      NA      NA      64
7  4374  45      F    82   100    60      NA      NA      60
8  6226  21      F    61    82    63      61      NA      NA
9  5265  97      M    83    83    68      NA      89      NA
10 5441  95      M    70    79    99      NA      79      NA

更新2/9:

为了允许使用其他变量名称，我们可以使用特定的解决方案或更通用的解决方案：

特定：

df |>
mutate(across(c(HW, Exam, Final) & !ends_with(".x"),
~ coalesce(., get(paste0(cur_column(), ".x")))))

概述：

df |>
mutate(across(ends_with(".x"),
~ coalesce(get(sub("\.x", "", cur_column())), .)))

新输出：

ID Age Gender HW Exam Final HW.x Exam.x Final.x
1  5166  80      F 60   79    NA   60     79      64
2  3375  35      M NA   88    72   65     88      72
3  5722  19      F NA   65    75   81     65      75
4  3701  27      M 89   NA    61   89     89      61
5  1424  67      F 69   94    91   69     94      91
6  1407  20      F 75   72    66   75     72      66
7  2927  39      M 63   82    NA   63     82      86
8  7315  90      F NA   92    79   70     92      79
9  7420  76      F 87   83    87   87     83      87
10 9334  73      F 86   NA    64   86     82      64

新数据：

df = data.frame(ID = sample(1000:9999,10),
Age = sample(18:99,10),
Gender = sample(c("M","F"),10, replace = TRUE),              
HW = sample(60:100,10),
Exam = sample(60:100,10),
Final = sample(60:100,10),
HW.x = rep(NA,10),
Exam.x = rep(NA,10),
Final.x = rep(NA,10))
df$HW[c(2,3,8)] = NA
df$Exam[c(4,10)] = NA
df$Final[c(1,7)] = NA
df$HW.x[c(2,3,4,8)] = sample(60:100,4)
df$Exam.x[c(4,9,10)] = sample(60:100,3)
df$Final.x[c(1,6,7)] = sample(60:100,3)

使用dplyover

library(dplyover)
df <- df %>% 
mutate(across2(matches("Test\d+$"), ends_with(".x"),
coalesce, .names = "{xcol}"))

-输出

df
ID Age Gender Test1 Test2 Test3 Test1.x Test2.x Test3.x
1  7877  40      M    78    70    84      NA      NA      84
2  6345  54      F    62    99    61      62      NA      NA
3  9170  41      F    82    80    96      82      NA      NA
4  2400  83      M   100    95   100      94      95      NA
5  5920  66      M    77    62    69      NA      NA      NA
6  2569  34      M    99    96    81      NA      NA     100
7  7879  28      M    64    71    90      NA      NA      90
8  8652  53      F    95    74    89      95      NA      NA
9  6357  97      F    92    86    83      NA      86      NA
10 1943  45      M    95    72    98      NA      72      NA

相关内容

最新更新

热门标签：