如何将一行中 NA 的第一个实例替换为 R 中的给定值?

  • 本文关键字:替换 实例 第一个 NA 一行 r
  • 更新时间 :
  • 英文 :


我正在尝试识别一行中的第一个#N/A,并将其替换为特定值。如何连续识别该模式?

下面是我的数据的样子。

Value,   C1,   C2,   C3,   C4,   C5,   C6,   C7
100,   13,   14,   15, #N/A, #N/A, #N/A, #N/A
200,   13, #N/A, #N/A, #N/A, #N/A, #N/A, #N/A
350,   12,   23,   43,   45,   67, #N/A, #N/A

以下是我想要的结果:

Value,   C1,   C2,   C3,   C4,   C5,   C6,   C7
100,   13,   14,   15,  100, #N/A, #N/A, #N/A
200,   13,  200, #N/A, #N/A, #N/A, #N/A, #N/A
350,   12,   23,   43,   45,   67,  350, #N/A

这是一个每行有NAs 的data.frame

df <- data.frame(matrix(sample(c(1:4, NA), 100, T), 10))
df
#    X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
# 1   3 NA  4  2  4  3  2  4  3   1
# 2   2  2  3  1  1 NA  4  3 NA   3
# 3  NA  1  1  2  3  2  2  4  2  NA
# 4   4  2  3  3 NA  4  2  2  4  NA
# 5   2  3 NA  1  4  4  4  2  4   2
# 6   4  4  2 NA  2  4 NA  4 NA   4
# 7   3  3  3  3 NA NA  3  2  1  NA
# 8   3  3 NA  1  4  4  2  4  3  NA
# 9   3 NA NA  2  3  2  4  3  4   2
# 10  2  2  4 NA  4  1  3 NA  1   2

现在将第 1 行的第一NA替换为101,将第 2 行的第一NA替换为102等,我们可以做

inds <- cbind(seq(nrow(df)), apply(is.na(df), 1, which.max))
df[inds] <- 101:110
df
#     X1  X2  X3  X4  X5  X6 X7 X8 X9 X10
# 1    3 101   4   2   4   3  2  4  3   1
# 2    2   2   3   1   1 102  4  3 NA   3
# 3  103   1   1   2   3   2  2  4  2  NA
# 4    4   2   3   3 104   4  2  2  4  NA
# 5    2   3 105   1   4   4  4  2  4   2
# 6    4   4   2 106   2   4 NA  4 NA   4
# 7    3   3   3   3 107  NA  3  2  1  NA
# 8    3   3 108   1   4   4  2  4  3  NA
# 9    3 109  NA   2   3   2  4  3  4   2
# 10   2   2   4 110   4   1  3 NA  1   2

这是一个带有max.col的矢量化选项

j1 <-  max.col(is.na(df[-1]), 'first') * (rowSums(is.na(df[-1])) != 0)
ind <- j1 != 0
df[-1][cbind(seq_len(nrow(df)), j1)[ind,]] <- df$Value[ind]
df
#   Value  X1  X2 X3  X4 X5  X6  X7 X8 X9 X10
#1    100   3   2  1 100  4  NA   3  4 NA   4
#2    200   4   2  3   3  1 200   1 NA  1   1
#3    350   3   2  2   4  2   1 350  3 NA   3
#4    200   2   1  2   2  4   2   1  1  1   2
#5    150 150   1  3   4  3   1   4  3  1   2
#6    100   2   2  1   2  1   2   1  1  1   1
#7    120 120  NA  4   3  3   4   3 NA  3   2
#8    180   2   3  2   4  3   2   2  2  2   1
#9    100   2 100  2   3  4   3   1  2  1   1
#10   300   1 300 NA   2  1   1   2  1  2   3

数据

df <- structure(list(Value = c(100, 200, 350, 200, 150, 100, 120, 180, 
100, 300), X1 = c(3L, 4L, 3L, 2L, NA, 2L, NA, 2L, 2L, 1L), X2 = c(2L, 
2L, 2L, 1L, 1L, 2L, NA, 3L, NA, NA), X3 = c(1L, 3L, 2L, 2L, 3L, 
1L, 4L, 2L, 2L, NA), X4 = c(NA, 3L, 4L, 2L, 4L, 2L, 3L, 4L, 3L, 
2L), X5 = c(4L, 1L, 2L, 4L, 3L, 1L, 3L, 3L, 4L, 1L), X6 = c(NA, 
NA, 1L, 2L, 1L, 2L, 4L, 2L, 3L, 1L), X7 = c(3L, 1L, NA, 1L, 4L, 
1L, 3L, 2L, 1L, 2L), X8 = c(4L, NA, 3L, 1L, 3L, 1L, NA, 2L, 2L, 
1L), X9 = c(NA, 1L, NA, 1L, 1L, 1L, 3L, 2L, 1L, 2L), X10 = c(4L, 
1L, 3L, 2L, 2L, 1L, 2L, 1L, 1L, 3L)), .Names = c("Value", "X1", 
"X2", "X3", "X4", "X5", "X6", "X7", "X8", "X9", "X10"), row.names = c(NA, 
-10L), class = "data.frame")

最新更新