对于这个数据集,我想获得每个个体的最新观测值
ID <- c(1:5)
Age <- c(23,34,26,18,35)
day1 <- c(NA,NA,NA,4,2)
day2 <- c(NA,3,NA,NA,NA)
day3 <- c(2,NA,3,NA,4)
last_value <- c(2,3,3,4,4)
mydata <- data.frame(ID, Age, day1, day2, day3)
ID Age day1 day2 day3 last_value
1 1 23 NA NA 2 2
2 2 34 NA 3 NA 3
3 3 26 NA NA 3 3
4 4 18 4 NA NA 4
5 5 35 2 NA 4 4
使用dplyr
,您可以select()
您需要的列,rev()
将它们反向排列,然后coalesce()
查找每一行的第一个非缺失元素。
library(dplyr)
mydata %>%
mutate(last_value = coalesce(!!!rev(select(., starts_with("day")))))
ID Age day1 day2 day3 last_value
1 1 23 NA NA 2 2
2 2 34 NA 3 NA 3
3 3 26 NA NA 3 3
4 4 18 4 NA NA 4
5 5 35 2 NA 4 4
也与across
+do.call
:
mydata %>%
mutate(last_value = do.call(coalesce, rev(across(starts_with("day")))))
library(tidyverse)
mydata %>%
pivot_longer(-c(ID, Age)) %>%
group_by(ID) %>%
drop_na() %>%
mutate(last_value = last(value)) %>%
pivot_wider(names_from = name, values_from = value)
# A tibble: 5 × 6
# Groups: ID [5]
ID Age last_value day3 day2 day1
<int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 23 2 2 NA NA
2 2 34 3 NA 3 NA
3 3 26 3 3 NA NA
4 4 18 4 NA NA 4
5 5 35 4 4 NA 2
使用base R
mydata$last_value <- apply(mydata[-(1:2)], 1, (x) tail(x[!is.na(x)], 1))
与产出
> mydata
ID Age day1 day2 day3 last_value
1 1 23 NA NA 2 2
2 2 34 NA 3 NA 3
3 3 26 NA NA 3 3
4 4 18 4 NA NA 4
5 5 35 2 NA 4 4
或使用exec
+coalesce
library(dplyr)
library(purrr)
mydata %>%
mutate(last_value = exec(coalesce, !!! rlang::syms(names(.)[5:3])))