R-根据数字变量的级别编码新因素



我正在尝试根据另一列的数字值创建一个因子列。这是我的数据子集:

> dput(sample)
structure(list(ID = c(1683L, 1684L, 1684L, 1684L, 1684L, 1685L, 
1685L, 1685L, 1685L, 1686L, 1686L, 1686L, 1686L, 30759L, 30759L, 
30759L, 30759L, 30760L, 30760L, 30760L, 30760L), Month = structure(c(2L, 
2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 
2L, 3L, 1L, 2L), .Label = c("Jun", "Jul", "Aug"), class = "factor"), 
    Year = c(2018, 2017, 2017, 2018, 2018, 2017, 2017, 2018, 
    2018, 2017, 2017, 2018, 2018, 2017, 2017, 2018, 2018, 2017, 
    2017, 2018, 2018), Homerange = c(NA, 27.2850594918174, NA, 
    NA, NA, NA, 30.52684873837, NA, NA, NA, 30.7069481409563, 
    10.625864752589, 29.2661529202662, 32.3278427642325, NA, 
    NA, NA, NA, 33.8586876862157, NA, NA)), out.attrs = list(
    dim = c(58L, 4L, 2L), dimnames = list(Var1 = c("Var1= 1657", 
    "Var1= 1658", "Var1= 1659", "Var1= 1660", "Var1= 1661", "Var1= 1662", 
    "Var1= 1663", "Var1= 1664", "Var1= 1666", "Var1= 1667", "Var1= 1668", 
    "Var1= 1669", "Var1= 1670", "Var1= 1671", "Var1= 1672", "Var1= 1673", 
    "Var1= 1674", "Var1= 1675", "Var1= 1676", "Var1= 1678", "Var1= 1679", 
    "Var1= 1680", "Var1= 1681", "Var1= 1682", "Var1= 1683", "Var1= 1684", 
    "Var1= 1685", "Var1= 1686", "Var1=30759", "Var1=30760", "Var1=30761", 
    "Var1=30762", "Var1=30763", "Var1=30764", "Var1=30765", "Var1=30766", 
    "Var1=30767", "Var1=30768", "Var1=30769", "Var1=30770", "Var1=30771", 
    "Var1=30772", "Var1=30773", "Var1=30774", "Var1=30775", "Var1=30776", 
    "Var1=30777", "Var1=30778", "Var1=30779", "Var1=30780", "Var1=30781", 
    "Var1=30782", "Var1=30783", "Var1=30784", "Var1=30785", "Var1=30786", 
    "Var1=30787", "Var1=30788"), Var2 = c("Var2=Jun", "Var2=Jul", 
    "Var2=Aug", "Var2=Sep"), Var3 = c("Var3=2017", "Var3=2018"
    ))), row.names = c(315L, 84L, 142L, 258L, 316L, 85L, 143L, 
259L, 317L, 86L, 144L, 260L, 318L, 87L, 145L, 261L, 319L, 88L, 
146L, 262L, 320L), class = "data.frame")

数字列" ID"具有1659-1685和30759-30788的值。我想做的是创建一个与2个级别" V13"的因子列"类型",该级别对应于IDS 1659-1685,而" V16"对应于IDS 30759-30788。我知道我之前已经做过了,但是由于某种原因,我不记得如何做。感谢您的帮助!

假设ID 1686在您的范围内未考虑它可以尝试以下操作:

library(dplyr)
library(forcats)
df %>% 
  mutate(type = case_when(between(ID, 1659, 1685) ~ "V13",
                          between(ID, 30759, 30788) ~ "V16")) %>%
  mutate(type = as_factor(type))
# A tibble: 21 x 5
      ID Month  Year Homerange type 
   <int> <fct> <dbl>     <dbl> <fct>
 1  1683 Jul    2018      NA   V13  
 2  1684 Jul    2017      27.3 V13  
 3  1684 Aug    2017      NA   V13  
 4  1684 Jun    2018      NA   V13  
 5  1684 Jul    2018      NA   V13  
 6  1685 Jul    2017      NA   V13  
 7  1685 Aug    2017      30.5 V13  
 8  1685 Jun    2018      NA   V13  
 9  1685 Jul    2018      NA   V13  
10  1686 Jul    2017      NA   NA   
11  1686 Aug    2017      30.7 NA   
12  1686 Jun    2018      10.6 NA   
13  1686 Jul    2018      29.3 NA   
14 30759 Jul    2017      32.3 V16  
15 30759 Aug    2017      NA   V16  
16 30759 Jun    2018      NA   V16  
17 30759 Jul    2018      NA   V16  
18 30760 Jul    2017      NA   V16  
19 30760 Aug    2017      33.9 V16  
20 30760 Jun    2018      NA   V16  
21 30760 Jul    2018      NA   V16 

直碱基解决方案将是应用ifelse

sample <- transform(sample,
                    Type=factor(ifelse(ID %in% 1659:1685, "V13", 
                                       ifelse(ID %in% 30759:30788, "V16",
                                              NA))))

使用cut(信用 @Camille (:

transform(sample, Type2=cut(sample$ID, c(1659, 1685, 1686, 30788), include.lowest=TRUE,
                            labels=c("V13", NA, "V16")))

或使用data.table::inrange

library(data.table)
sample <- transform(sample,
                    Type=factor(ifelse(ID %inrange% c(1659, 1685), "V13", 
                                       ifelse(ID %inrange% c(30759, 30788), "V16",
                                              NA))))

&nbsp;

str(sample)
# 'data.frame': 21 obs. of  5 variables:
# $ ID       : int  1683 1684 1684 1684 1684 1685 1685 1685 1685 1686 ...
# $ Month    : Factor w/ 3 levels "Jun","Jul","Aug": 2 2 3 1 2 2 3 1 2 2 ...
# $ Year     : num  2018 2017 2017 2018 2018 ...
# $ Homerange: num  NA 27.3 NA NA NA ...
# $ Type     : Factor w/ 2 levels "V13","V16": 1 1 1 1 1 1 1 1 1 NA ...

最新更新