我正在尝试用df中其他列的相应值替换多个列上的NA。
df = data.frame(ID = sample(1000:9999,10),
Age = sample(18:99,10),
Gender = sample(c("M","F"),10, replace = TRUE),
Test1 = sample(60:100,10),
Test2 = sample(60:100,10),
Test3 = sample(60:100,10),
Test1.x = rep(NA,10),
Test2.x = rep(NA,10),
Test3.x = rep(NA,10))
df$Test1[c(2,3,8)] = NA
df$Test2[c(4,10)] = NA
df$Test3[c(1,7)] = NA
df$Test1.x[c(2,3,4,8)] = sample(60:100,4)
df$Test2.x[c(4,9,10)] = sample(60:100,3)
df$Test3.x[c(1,6,7)] = sample(60:100,3)
print(df)
ID Age Gender Test1 Test2 Test3 Test1.x Test2.x Test3.x
1 7877 40 M 78 70 NA NA NA 84
2 6345 54 F NA 99 61 62 NA NA
3 9170 41 F NA 80 96 82 NA NA
4 2400 83 M 100 NA 100 94 95 NA
5 5920 66 M 77 62 69 NA NA NA
6 2569 34 M 99 96 81 NA NA 100
7 7879 28 M 64 71 NA NA NA 90
8 8652 53 F NA 74 89 95 NA NA
9 6357 97 F 92 86 83 NA 86 NA
10 1943 45 M 95 NA 98 NA 72 NA
在使用str_replace时,我只想用相应的test.x分数替换测试分数中的NA。我的实际数据帧包含3个以上的列,但所有对应的列名都与"相同;。x〃;之后
有什么想法可以让这件事变得简单快捷吗?我正在努力在所述列之间进行变异或使用replace_nas。
在dplyr
中,我们可以将coalesce
与across
一起使用。
library(dplyr)
df |>
mutate(across(starts_with("Test") & !ends_with(".x"),
~ coalesce(., get(paste0(cur_column(), ".x")))))
输出:
ID Age Gender Test1 Test2 Test3 Test1.x Test2.x Test3.x
1 5022 90 M 94 68 79 NA NA 79
2 1625 41 M 71 66 89 71 NA NA
3 6438 86 M 86 94 94 86 NA NA
4 3249 93 F 74 90 76 68 90 NA
5 7338 70 F 64 63 70 NA NA NA
6 9416 27 F 78 74 75 NA NA 64
7 4374 45 F 82 100 60 NA NA 60
8 6226 21 F 61 82 63 61 NA NA
9 5265 97 M 83 83 68 NA 89 NA
10 5441 95 M 70 79 99 NA 79 NA
更新2/9:
为了允许使用其他变量名称,我们可以使用特定的解决方案或更通用的解决方案:
特定:
df |>
mutate(across(c(HW, Exam, Final) & !ends_with(".x"),
~ coalesce(., get(paste0(cur_column(), ".x")))))
概述:
df |>
mutate(across(ends_with(".x"),
~ coalesce(get(sub("\.x", "", cur_column())), .)))
新输出:
ID Age Gender HW Exam Final HW.x Exam.x Final.x
1 5166 80 F 60 79 NA 60 79 64
2 3375 35 M NA 88 72 65 88 72
3 5722 19 F NA 65 75 81 65 75
4 3701 27 M 89 NA 61 89 89 61
5 1424 67 F 69 94 91 69 94 91
6 1407 20 F 75 72 66 75 72 66
7 2927 39 M 63 82 NA 63 82 86
8 7315 90 F NA 92 79 70 92 79
9 7420 76 F 87 83 87 87 83 87
10 9334 73 F 86 NA 64 86 82 64
新数据:
df = data.frame(ID = sample(1000:9999,10),
Age = sample(18:99,10),
Gender = sample(c("M","F"),10, replace = TRUE),
HW = sample(60:100,10),
Exam = sample(60:100,10),
Final = sample(60:100,10),
HW.x = rep(NA,10),
Exam.x = rep(NA,10),
Final.x = rep(NA,10))
df$HW[c(2,3,8)] = NA
df$Exam[c(4,10)] = NA
df$Final[c(1,7)] = NA
df$HW.x[c(2,3,4,8)] = sample(60:100,4)
df$Exam.x[c(4,9,10)] = sample(60:100,3)
df$Final.x[c(1,6,7)] = sample(60:100,3)
使用dplyover
library(dplyover)
df <- df %>%
mutate(across2(matches("Test\d+$"), ends_with(".x"),
coalesce, .names = "{xcol}"))
-输出
df
ID Age Gender Test1 Test2 Test3 Test1.x Test2.x Test3.x
1 7877 40 M 78 70 84 NA NA 84
2 6345 54 F 62 99 61 62 NA NA
3 9170 41 F 82 80 96 82 NA NA
4 2400 83 M 100 95 100 94 95 NA
5 5920 66 M 77 62 69 NA NA NA
6 2569 34 M 99 96 81 NA NA 100
7 7879 28 M 64 71 90 NA NA 90
8 8652 53 F 95 74 89 95 NA NA
9 6357 97 F 92 86 83 NA 86 NA
10 1943 45 M 95 72 98 NA 72 NA