近似误差,至少需要两个非NA值才能插值

  • 本文关键字:两个 NA 插值 误差 r
  • 更新时间 :
  • 英文 :


我正在尝试使用近似趣来计算插值的缺失值:

column_name <- colnames(vndusd_merged);
lapply(column_name, function(x){
if(x != "Date"){ 
interpl <- approxfun(vndusd_merged$Date[!is.na(vndusd_merged$x)], vndusd_merged$x[!is.na(vndusd_merged$x)]);
vndusd_merged$x <- interpl(vndusd_merged$Date);  
}
})

我不断收到此错误:

Error in approxfun(vndusd_merged$Date[!is.na(vndusd_merged$x)], vndusd_merged$x[!is.na(vndusd_merged$x)]) : 
need at least two non-NA values to interpolate 
4.
stop("need at least two non-NA values to interpolate") 
3.
approxfun(vndusd_merged$Date[!is.na(vndusd_merged$x)], vndusd_merged$x[!is.na(vndusd_merged$x)]) 
2.
FUN(X[[i]], ...) 
1.
lapply(column_name, function(x) {
if (x != "Date") {
interpl <- approxfun(vndusd_merged$Date[!is.na(vndusd_merged$x)], 
vndusd_merged$x[!is.na(vndusd_merged$x)]) ... 

这是前 20 行vndusd_merged的示例。"日期"列没有任何 N/A

Date Ask.Close Bid.Close
1  01/01/2014     21115     21075
2  02/01/2014     21160     21060
3  03/01/2014     21115     21075
4  04/01/2014        NA        NA
5  05/01/2014        NA        NA
6  06/01/2014     21120     21080
7  07/01/2014     21115     21075
8  08/01/2014     21120     21080
9  09/01/2014     21115     21075
10 10/01/2014     21110     21072
11 11/01/2014        NA        NA
12 12/01/2014        NA        NA
13 13/01/2014     21120     21060
14 14/01/2014     21110     21072
15 15/01/2014     21110     21070
16 16/01/2014     21120     21080
17 17/01/2014     21110     21070
18 18/01/2014        NA        NA
19 19/01/2014        NA        NA
20 20/01/2014     21110     21070

我尝试通过手动插入列名来运行它,但仍然遇到相同的错误。

interpl <- aproxfun(vndusd_merged$Date[!is.na(vndusd_merged$Ask.Close)], vndusd_merged$Ask.Close[!is.na(vndusd_merged$Ask.Close)]);

如何解决这个问题?

您可以使用approx更简洁地执行相同的操作。

ip <- sapply(vndusd_merged[-1], function(x) with(vndusd_merged, approx(Date, x, xout=Date)$y))
cbind(vndusd_merged[1], ip)
#          Date Ask.Close Bid.Close
# 1  01/01/2014  21115.00  21075.00
# 2  02/01/2014  21160.00  21060.00
# 3  03/01/2014  21115.00  21075.00
# 4  04/01/2014  21116.67  21076.67
# 5  05/01/2014  21118.33  21078.33
# 6  06/01/2014  21120.00  21080.00
# 7  07/01/2014  21115.00  21075.00
# 8  08/01/2014  21120.00  21080.00
# 9  09/01/2014  21115.00  21075.00
# 10 10/01/2014  21110.00  21072.00
# 11 11/01/2014  21113.33  21068.00
# 12 12/01/2014  21116.67  21064.00
# 13 13/01/2014  21120.00  21060.00
# 14 14/01/2014  21110.00  21072.00
# 15 15/01/2014  21110.00  21070.00
# 16 16/01/2014  21120.00  21080.00
# 17 17/01/2014  21110.00  21070.00
# 18 18/01/2014  21110.00  21070.00
# 19 19/01/2014  21110.00  21070.00
# 20 20/01/2014  21110.00  21070.00

数据:

vndusd_merged <- structure(list(Date = structure(1:20, .Label = c("01/01/2014", 
"02/01/2014", "03/01/2014", "04/01/2014", "05/01/2014", "06/01/2014", 
"07/01/2014", "08/01/2014", "09/01/2014", "10/01/2014", "11/01/2014", 
"12/01/2014", "13/01/2014", "14/01/2014", "15/01/2014", "16/01/2014", 
"17/01/2014", "18/01/2014", "19/01/2014", "20/01/2014"), class = "factor"), 
Ask.Close = c(21115L, 21160L, 21115L, NA, NA, 21120L, 21115L, 
21120L, 21115L, 21110L, NA, NA, 21120L, 21110L, 21110L, 21120L, 
21110L, NA, NA, 21110L), Bid.Close = c(21075L, 21060L, 21075L, 
NA, NA, 21080L, 21075L, 21080L, 21075L, 21072L, NA, NA, 21060L, 
21072L, 21070L, 21080L, 21070L, NA, NA, 21070L)), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20"))

以下代码执行问题要求的操作。

vndusd_merged$Date <- as.Date(vndusd_merged$Date, "%d/%m/%Y")
vndusd_merged[-1] <- lapply(vndusd_merged[-1], function(x){
i <- !is.na(x)
f <- approxfun(vndusd_merged$Date[i], x[i])
y <- f(vndusd_merged$Date)
y
})
vndusd_merged
#         Date Ask.Close Bid.Close
#1  2014-01-01  21115.00  21075.00
#2  2014-01-02  21160.00  21060.00
#3  2014-01-03  21115.00  21075.00
#4  2014-01-04  21116.67  21076.67
#5  2014-01-05  21118.33  21078.33
#6  2014-01-06  21120.00  21080.00
#7  2014-01-07  21115.00  21075.00
#8  2014-01-08  21120.00  21080.00
#9  2014-01-09  21115.00  21075.00
#10 2014-01-10  21110.00  21072.00
#11 2014-01-11  21113.33  21068.00
#12 2014-01-12  21116.67  21064.00
#13 2014-01-13  21120.00  21060.00
#14 2014-01-14  21110.00  21072.00
#15 2014-01-15  21110.00  21070.00
#16 2014-01-16  21120.00  21080.00
#17 2014-01-17  21110.00  21070.00
#18 2014-01-18  21110.00  21070.00
#19 2014-01-19  21110.00  21070.00
#20 2014-01-20  21110.00  21070.00

如果要使用列名向量(在本例中不等于"Date"(,请使用上面的代码,但应用于不同的子数据帧。

column_name <- colnames(vndusd_merged)
column_name <- column_name[column_name != "Date"]
vndusd_merged[column_name] <- lapply(vndusd_merged[column_name], function(x){
#same code as above
})

最新更新