R按行替换最小值的第一个实例



我该怎么做呢?

  1. 替换所有值<6带NA,
  2. 如果行中只有一个NA,则将最小值的第一个实例替换为-99?

一些包含ID变量和总列的数据:

library(tidyverse)
df <- data.frame(id = c(1,2,3,4,5), a = c(10,12,4,17,3), b = c(9,12,3,20,6), c = c(2,2,10,10,10), d = c(12,16,12,10,12))
df$total <- apply(df[,c(2:5)], 1, sum)

id  a   b   c   d   total
1   10  9   2   12   33
2   12  12  2   16   42
3   4   3   10  12   29
4   17  20  10  10   57
5   3   6   10  12   31

我期望的输出是

id    a    b     c     d   total
1    10   -99    NA    12    33
2   -99    12    NA    16    42
3    NA    NA    10    12    29
4    17    20    10    10    57
5    NA   -99    10    12    31

我的尝试
df_mod <- df %>%
# Make <6 NA
mutate(
across(
.cols = 'a':'total',
~case_when(
.x < 6 ~ as.numeric(NA),
TRUE ~ .x
)
)
) %>%
# Add a count of NAs
rowwise() %>%
mutate(Count_NA = sum(is.na(cur_data()))) %>%
ungroup() 
# Transpose and get row minimum
df_mod2 <- t(df_mod[,-c(1,ncol(df_mod))]) %>%
apply(., 2, function(a){
min <- min(a, na.rm = TRUE)
}
) %>% 
cbind(df_mod, .) %>%
rename(., min = .) %>%
tibble(.)

# If count_NA  = 1 replace the first instance of min
df_mod2 %>%
rowwise() %>%
mutate(
across(
.cols = 'a':'total',
~case_when(
Count_NA == 1 & .x == min ~ replace(.x, first(match(min, .x)), -99),
TRUE ~ .x)
)
) %>%
select(-'Count_NA', -'min')

给出如下

id    a    b     c     d   total
1    10   -99    NA    12    33
2   -99   -99    NA    16    42
3    NA    NA    10    12    29
4    17    20    10    10    57
5    NA   -99    10    12    31

感谢

如果你愿意转向而不是排成一行,那么这个解决方案就会奏效。

library(dplyr)
df %>% 
pivot_longer(names_to = 'col',
values_to = 'val',
-c(id, total)) %>%
group_by(id) %>%
mutate(val2 = rank(val, ties.method = 'first'),
val = ifelse(val < 6, NA , val),
val = ifelse(sum(is.na(val)) == 1 & val2 == 2, -99, val)) %>% 
select(-val2) %>%
pivot_wider(names_from = col, 
values_from = val) %>% 
relocate(total, .after = "d")

结果如下:

# A tibble: 5 × 6
# Groups:   id [5]
id     a     b     c     d total
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     1    10   -99    NA    12    33
2     2   -99    12    NA    16    42
3     3    NA    NA    10    12    29
4     4    17    20    10    10    57
5     5    NA   -99    10    12    31

这是不清楚你的意思是'第二个'最小值,因为你替换了最小值。您可以使用data.table:

library(data.table)
setDT(df)[
, 
(cols) := transpose(
lapply(
transpose(lapply(.SD, function(x) fifelse(x < 6, NA_real_, x))), 
function(x) if(sum(is.na(x)) == 1) replace(x, which.min(x), -99) else x
)
),
.SDcols = setdiff(names(df), "id")
]

相关内容

  • 没有找到相关文章

最新更新