r语言 - 用0填充内部NAs



我的数据框架有一个日期列和两个数值列,包括一些NA,如下所示:

df
#          Date  a  b
# 1  1990-02-01 NA NA
# 2  1990-03-01 NA NA
# 3  1990-04-01 NA  3
# 4  1990-05-01  1  4
# 5  1990-06-01  2  5
# 6  1990-07-01  3 NA
# 7  1990-08-01  4  7
# 8  1990-09-01  5 NA
# 9  1990-10-01  6  9
# 10 1990-11-01  7 NA
# 11 1990-12-01  8 NA
# 12 1991-01-01  9 NA
# 13 1991-02-01 10 13
# 14 1991-03-01 11 14
# 15 1991-04-01 12 15
# 16 1991-05-01 13 NA

我想保留在时间序列开始之前出现的NAs,然后用零替换它们。最终结果应该如下所示:

finaldf
#          Date  a  b
# 1  1990-02-01 NA NA
# 2  1990-03-01 NA NA
# 3  1990-04-01 NA  3
# 4  1990-05-01  1  4
# 5  1990-06-01  2  5
# 6  1990-07-01  3  0
# 7  1990-08-01  4  7
# 8  1990-09-01  5  0
# 9  1990-10-01  6  9
# 10 1990-11-01  7  0
# 11 1990-12-01  8  0
# 12 1991-01-01  9  0
# 13 1991-02-01 10 13
# 14 1991-03-01 11 14
# 15 1991-04-01 12 15
# 16 1991-05-01 13  0

是否有一些替换/填充功能从一些很酷的包,做到这一点?或者你自己会怎么处理这个问题?

数据
df <- data.frame(Date=seq(lubridate::ymd('1990-02-01'), lubridate::ymd('1991-05-01'), by='1 month'), 
a=c(rep(NA, 3), 1:13), 
b=c(NA, NA, 3, 4, 5, NA, 7, NA, 9, NA, NA, NA, 13, 14, 15, NA))
finaldf <- data.frame(Date=seq(lubridate::ymd('1990-02-01'), lubridate::ymd('1991-05-01'), by='1 month'), 
a=c(rep(NA, 3), 1:13), 
b=c(NA, NA, 3, 4, 5, 0, 7, 0, 9, 0, 0, 0, 13, 14, 15, 0))

这很像TarJae的回答,但稍微更动态:

library(dplyr)
df %>% 
mutate(across(c(a, b), ~ifelse(cumsum(!is.na(.)) > 0 & is.na(.), 0, .)))

这返回

Date  a  b
1  1990-02-01 NA NA
2  1990-03-01 NA NA
3  1990-04-01 NA  3
4  1990-05-01  1  4
5  1990-06-01  2  5
6  1990-07-01  3  0
7  1990-08-01  4  7
8  1990-09-01  5  0
9  1990-10-01  6  9
10 1990-11-01  7  0
11 1990-12-01  8  0
12 1991-01-01  9  0
13 1991-02-01 10 13
14 1991-03-01 11 14
15 1991-04-01 12 15
16 1991-05-01 13  0

您可以这样考虑:

df <- data.frame(Date = seq(ymd('1990-02-01'),ymd('1991-05-01'), by = '1 month'), 
a = c(rep(NA,3),1:13), 
b = c(NA,NA,3,4,5,NA,7,NA,9,NA,NA,NA,13,14,15,NA) )
df$b <- ifelse(is.na(df$b) & (df$Date > "1990-04-01"), 0, df$b)
df
Date  a  b
1  1990-02-01 NA NA
2  1990-03-01 NA NA
3  1990-04-01 NA  3
4  1990-05-01  1  4
5  1990-06-01  2  5
6  1990-07-01  3  0
7  1990-08-01  4  7
8  1990-09-01  5  0
9  1990-10-01  6  9
10 1990-11-01  7  0
11 1990-12-01  8  0
12 1991-01-01  9  0
13 1991-02-01 10 13
14 1991-03-01 11 14
15 1991-04-01 12 15
16 1991-05-01 13  0

我们可以用across检查ab列,并结合ifelse语句:

library(dplyr)
df %>% 
mutate(across(c(a, b), ~ifelse(Date > Date[4] & is.na(.), 0, .)))
#mutate(across(c(a, b), ~ifelse(Date > Date[a==1] & is.na(.), 0, .))) # more general
Date           a     b
<date>     <int> <dbl>
1 1990-02-01    NA    NA
2 1990-03-01    NA    NA
3 1990-04-01    NA     3
4 1990-05-01     1     4
5 1990-06-01     2     5
6 1990-07-01     3     0
7 1990-08-01     4     7
8 1990-09-01     5     0
9 1990-10-01     6     9
10 1990-11-01     7     0
11 1990-12-01     8     0
12 1991-01-01     9     0
13 1991-02-01    10    13
14 1991-03-01    11    14
15 1991-04-01    12    15
16 1991-05-01    13     0

我们可以在which.minwhich.max之间使用replace,不需要包。

u <- which.min(df$b):which.max(df$b)
df$b[u] <- replace(df$b[u], is.na(df$b[u]), 0)
df
#          Date  a  b
# 1  1990-02-01 NA NA
# 2  1990-03-01 NA NA
# 3  1990-04-01 NA  3
# 4  1990-05-01  1  4
# 5  1990-06-01  2  5
# 6  1990-07-01  3  0
# 7  1990-08-01  4  7
# 8  1990-09-01  5  0
# 9  1990-10-01  6  9
# 10 1990-11-01  7  0
# 11 1990-12-01  8  0
# 12 1991-01-01  9  0
# 13 1991-02-01 10 13
# 14 1991-03-01 11 14
# 15 1991-04-01 12 15
# 16 1991-05-01 13 NA

数据:

df <- structure(list(Date = structure(c(7336, 7364, 7395, 7425, 7456, 
7486, 7517, 7548, 7578, 7609, 7639, 7670, 7701, 7729, 7760, 7790
), class = "Date"), a = c(NA, NA, NA, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 11L, 12L, 13L), b = c(NA, NA, 3, 4, 5, NA, 7, 
NA, 9, NA, NA, NA, 13, 14, 15, NA)), class = "data.frame", row.names = c(NA, 
-16L))

zoo::na.fill接受第二个参数,包含3个元素,用于填充前导,内导和尾NA的so:

library(zoo)
replace(df, -1, na.fill(df[-1], c(NA, 0, 0)))

df[-1] <- na.fill(df[-1], c(NA, 0, 0))

相关内容

  • 没有找到相关文章

最新更新