将NA值替换为r中前后行值的平均值

  • 本文关键字:平均值 NA 替换 r
  • 更新时间 :
  • 英文 :


我想写一个代码来替换所有的NA值的平均值前后行值。下面的代码适用于一个单列。有没有任何想法,我可以运行代码的所有列的数据集,而不写列名到代码一个接一个?

data$WTI[is.na(data$WTI)] <- rowMeans(cbind(data$WTI[which(is.na(data$WTI))-1], 
data$WTI[which(is.na(data$WTI))+1]))

我的数据是这样的:

> dput(head(data))
structure(list(Timestamp = structure(c(1629417600, 1629331200, 
1629244800, 1629158400, 1629072000, 1628812800), tzone = "UTC", class = c("POSIXct", 
"POSIXt")), USDTRY = c(8.4852, 8.4939, 8.4485, 8.4284, 8.453, 
8.5171), EURTRY = c(9.9325, 9.9311, 9.8916, 9.8746, 9.9618, 10.0539
), EURUSD = c(1.1696, 1.1674, 1.171, 1.1708, 1.1777, 1.1791), 
BIST100 = c(1444.63, 1439.86, 1449.59, 1461.69, 1455.25, 
1447.64), TR2YT = c(18.01, 18.01, 18.01, 18.01, 18.01, 18.15
), TR10YT = c(16.88, 16.87, 16.79, 16.8, 16.69, 16.77), TR_EURBON_2 = c(3.648673, 
3.63085, 3.611969, 3.572728, 3.567871, 3.559959), TR_EURBON_10 = c(6.302608, 
6.307343, 6.276473, 6.240502, 6.255035, 6.301358), BRENT = c(65.18, 
66.45, 68.23, 69.03, 69.51, 70.59), WTI = c(62.32, 63.69, 
65.46, 66.59, 67.29, 68.44), Altın = c(1780.8668, 1780.179, 
1787.59, 1785.9556, 1787.2383, 1779.1515), Gümüş = c(23.01, 
23.23, 23.4805, 23.6351, 23.8235, 23.74)), row.names = c(NA, 
-6L), class = c("tbl_df", "tbl", "data.frame"))

谢谢。

一种方法是使用leadlag:

library(dplyr)
Timestamp %>%
mutate(
across(where(is.numeric), 
~if_else(is.na(.), (dplyr::lead(.) + dplyr::lag(.)) / 2, .)
)
)

mutate修改现有列,across选择is.numeric返回true的列。~if_else(is.na(.), (dplyr::lead(.) + dplyr::lag(.)) / 2, .)检查列中的当前值是否为NA,如果是,则将其替换为前后列的平均值。

试试这个,

set.seed(42)
dat <- as.data.frame(matrix(sample(c(NA, 1:9), size = 35, replace = TRUE), ncol = 7))
dat
#   V1 V2 V3 V4 V5 V6 V7
# 1 NA  3  6  9  3  7  7
# 2  4  1  3  1  4  2  5
# 3 NA  9  8  2  4  9  9
# 4  8 NA  4  8  3 NA  7
# 5  9  7  3  8  1  9  3
dat[] <- lapply(dat, function(z) {
mtx <- cbind(c(NA, head(z, -1)), z, c(tail(z, -1), NA))
mtx[is.na(mtx[,2]) & rowSums(is.na(mtx)) > 1,] <- NA
out <- ifelse(is.na(mtx[,2]), rowMeans(mtx, na.rm = TRUE), mtx[,2])
out[is.nan(out)] <- NA
out
})
dat
#   V1 V2 V3 V4 V5 V6 V7
# 1 NA  3  6  9  3  7  7
# 2  4  1  3  1  4  2  5
# 3  6  9  8  2  4  9  9
# 4  8  8  4  8  3  9  7
# 5  9  7  3  8  1  9  3

如果你想让V1[1]也被更新,尽管没有"before"值,然后删除mtx[...] <- NA赋值:

# fresh dat
dat[] <- lapply(dat, function(z) {
mtx <- cbind(c(NA, head(z, -1)), z, c(tail(z, -1), NA))
out <- ifelse(is.na(mtx[,2]), rowMeans(mtx, na.rm = TRUE), mtx[,2])
out[is.nan(out)] <- NA
out
})
dat
#   V1 V2 V3 V4 V5 V6 V7
# 1  4  3  6  9  3  7  7
# 2  4  1  3  1  4  2  5
# 3  6  9  8  2  4  9  9
# 4  8  8  4  8  3  9  7
# 5  9  7  3  8  1  9  3

最新更新